Converted amd64 docs to tsgh-5218

author: Jeremy <51220084+jeremy-rifkin@users.noreply.github.com> 2022-12-02 12:40:18 -0500
committer: Jeremy <51220084+jeremy-rifkin@users.noreply.github.com> 2022-12-02 12:40:18 -0500
commit: 627cbf46b9247611d4841b8d7b2f674e6d9cf3c8 (patch)
tree: 60fcbde0260e78c7888b2b110b55fef9cfb8e26a
parent: 2e844e9ef535e486bb6d661e984074aec4811577 (diff)
download: compiler-explorer-gh-5218.tar.gz
compiler-explorer-gh-5218.zip
7 files changed, 340 insertions, 251 deletions
diff --git a/.eslintignore b/.eslintignore
index 43a2571e7..f9ab09a4b 100644
--- a/.eslintignore
+++ b/.eslintignore
@@ -8,5 +8,5 @@ views
 cypress
 
 # Autogenerated files
-lib/asm-docs/generated/asm-docs-*.js
+lib/asm-docs/generated/asm-docs-*
 etc/scripts/docenizer/vendor/jvms.html
diff --git a/.gitattributes b/.gitattributes
index cc5a86526..b1e215510 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,5 +1,5 @@
 docs/*  linguist-documentation
 *.s linguist-generated
 *.asm linguist-generated
-lib/asm-docs/generated/asm-docs-*.js linguist-generated
+lib/asm-docs/generated/asm-docs-* linguist-generated
 test/*-cases/* linguist-generated
diff --git a/.nycrc.yml b/.nycrc.yml
index d93f5ce0f..8a6923e23 100644
--- a/.nycrc.yml
+++ b/.nycrc.yml
@@ -3,7 +3,7 @@ include:
   - lib/**/*.js
   - lib/**/*.ts
 exclude:
-  - lib/asm-docs/generated/asm-docs-*.js
+  - lib/asm-docs/generated/asm-docs-*
   - lib/compilers/fake-for-test.js
   - lib/**/*.d.ts
 report-dir: ./out/coverage
diff --git a/.prettierignore b/.prettierignore
index f4aa01431..964f22e9e 100644
--- a/.prettierignore
+++ b/.prettierignore
@@ -38,7 +38,7 @@ examples
 out
 
 # Autogenerated files
-lib/asm-docs/generated/asm-docs-*.js
+lib/asm-docs/generated/asm-docs-*
 
 
 #########################
diff --git a/etc/scripts/docenizers/.gitignore b/etc/scripts/docenizers/.gitignore
new file mode 100644
index 000000000..db38717f6
--- /dev/null
+++ b/etc/scripts/docenizers/.gitignore
@@ -0,0 +1 @@
+asm-docs
diff --git a/etc/scripts/docenizers/docenizer-amd64.py b/etc/scripts/docenizers/docenizer-amd64.py
index 91c78f376..c1bba19ca 100755
--- a/etc/scripts/docenizers/docenizer-amd64.py
+++ b/etc/scripts/docenizers/docenizer-amd64.py
@@ -19,8 +19,8 @@ parser = argparse.ArgumentParser(description='Docenizes HTML version of the offi
 parser.add_argument('-i', '--inputfolder', type=str,
                     help='Folder where the input files reside as .html. Default is ./asm-docs/',
                     default='asm-docs')
-parser.add_argument('-o', '--outputpath', type=str, help='Final path of the .js file. Default is ./asm-docs.js',
-                    default='./asm-docs.js')
+parser.add_argument('-o', '--outputpath', type=str, help='Final path of the .ts file. Default is ./asm-docs.ts',
+                    default='./asm-docs-amd64.ts')
 parser.add_argument('-d', '--downloadfolder', type=str,
                     help='Folder where the archive will be downloaded and extracted', default='asm-docs')
 
@@ -354,10 +354,12 @@ def main():
     print(f"Writing {len(instructions)} instructions")
     with open(args.outputpath, 'w') as f:
         f.write("""
-export function getAsmOpcode(opcode) {
+import {AssemblyInstructionInfo} from '../base';
+
+export function getAsmOpcode(opcode: string | undefined): AssemblyInstructionInfo | undefined {
     if (!opcode) return;
     switch (opcode.toUpperCase()) {
-""")
+""".lstrip())
         for inst in instructions:
             for name in sorted(inst.names):
                 f.write(f'        case "{name}":\n')
diff --git a/lib/asm-docs/generated/asm-docs-amd64.js b/lib/asm-docs/generated/asm-docs-amd64.ts
index 2ca3545f3..9bbb3e13d 100644
--- a/lib/asm-docs/generated/asm-docs-amd64.js
+++ b/lib/asm-docs/generated/asm-docs-amd64.ts
@@ -1,5 +1,6 @@
+import {AssemblyInstructionInfo} from '../base';
 
-export function getAsmOpcode(opcode) {
+export function getAsmOpcode(opcode: string | undefined): AssemblyInstructionInfo | undefined {
     if (!opcode) return;
     switch (opcode.toUpperCase()) {
         case "AAA":
@@ -46,7 +47,7 @@ export function getAsmOpcode(opcode) {
 
         case "ADD":
             return {
-                "html": "<p>Adds the destination operand (first operand) and the source operand (second operand) and then stores the result in the destination operand. The destination operand can be a register or a memory location; the source operand can be an immediate, a register, or a memory location. (However, two memory operands cannot be used in one instruction.) When an immediate value is used as an operand, it is sign-extended to the length of the destination operand format.</p><p>The ADD instruction performs integer addition. It evaluates the result for both signed and unsigned integer operands and sets the CF and OF flags to indicate a carry (overflow) in the signed or unsigned result, respectively. The SF flag indicates the sign of the signed result.</p><p>This instruction can be used with a LOCK prefix to allow the instruction to be executed atomically.</p><p>In 64-bit mode, the instruction\u2019s default operation size is 32 bits. Using a REX prefix in the form of REX.R permits access to additional registers (R8-R15). Using a REX prefix in the form of REX.W promotes operation to 64 bits. See the summary chart at the beginning of this section for encoding data and limits.</p>",
+                "html": "<p>Adds the destination operand (first operand) and the source operand (second operand) and then stores the result in the destination operand. The destination operand can be a register or a memory location; the source operand can be an immediate, a register, or a memory location. (However, two memory operands cannot be used in one instruction.) When an immediate value is used as an operand, it is sign-extended to the length of the destination operand format.</p><p>The ADD instruction performs integer addition. It evaluates the result for both signed and unsigned integer operands and sets the OF and CF flags to indicate a carry (overflow) in the signed or unsigned result, respectively. The SF flag indicates the sign of the signed result.</p><p>This instruction can be used with a LOCK prefix to allow the instruction to be executed atomically.</p><p>In 64-bit mode, the instruction\u2019s default operation size is 32 bits. Using a REX prefix in the form of REX.R permits access to additional registers (R8-R15). Using a REX prefix in the form of REX.W promotes operation to 64 bits. See the summary chart at the beginning of this section for encoding data and limits.</p>",
                 "tooltip": "Adds the destination operand (first operand) and the source operand (second operand) and then stores the result in the destination operand. The destination operand can be a register or a memory location; the source operand can be an immediate, a register, or a memory location. (However, two memory operands cannot be used in one instruction.) When an immediate value is used as an operand, it is sign-extended to the length of the destination operand format.",
                 "url": "http://www.felixcloutier.com/x86/ADD.html"
             };
@@ -54,16 +55,16 @@ export function getAsmOpcode(opcode) {
         case "ADDPD":
         case "VADDPD":
             return {
-                "html": "<p>Add two, four or eight packed double-precision floating-point values from the first source operand to the second source operand, and stores the packed double-precision floating-point results in the destination operand.</p><p>EVEX encoded versions: The first source operand is a ZMM/YMM/XMM register. The second source operand can be a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 64-bit memory location. The destination operand is a ZMM/YMM/XMM register conditionally updated with writemask k1.</p><p>VEX.256 encoded version: The first source operand is a YMM register. The second source operand can be a YMM register or a 256-bit memory location. The destination operand is a YMM register. The upper bits (MAXVL-1:256) of the corresponding ZMM register destination are zeroed.</p><p>VEX.128 encoded version: the first source operand is a XMM register. The second source operand is an XMM register or 128-bit memory location. The destination operand is an XMM register. The upper bits (MAXVL-1:128) of the corresponding ZMM register destination are zeroed.</p><p>128-bit Legacy SSE version: The second source can be an XMM register or an 128-bit memory location. The destination is not distinct from the first source XMM register and the upper Bits (MAXVL-1:128) of the corresponding ZMM register destination are unmodified.</p>",
-                "tooltip": "Add two, four or eight packed double-precision floating-point values from the first source operand to the second source operand, and stores the packed double-precision floating-point results in the destination operand.",
+                "html": "<p>Adds two, four or eight packed double-precision floating-point values from the first source operand to the second source operand, and stores the packed double-precision floating-point result in the destination operand.</p><p>EVEX encoded versions: The first source operand is a ZMM/YMM/XMM register. The second source operand can be a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 64-bit memory location. The destination operand is a ZMM/YMM/XMM register conditionally updated with writemask k1.</p><p>VEX.256 encoded version: The first source operand is a YMM register. The second source operand can be a YMM register or a 256-bit memory location. The destination operand is a YMM register. The upper bits (MAXVL-1:256) of the corresponding ZMM register destination are zeroed.</p><p>VEX.128 encoded version: the first source operand is a XMM register. The second source operand is an XMM register or 128-bit memory location. The destination operand is an XMM register. The upper bits (MAXVL-1:128) of the corresponding ZMM register destination are zeroed.</p><p>128-bit Legacy SSE version: The second source can be an XMM register or an 128-bit memory location. The destination is not distinct from the first source XMM register and the upper Bits (MAXVL-1:128) of the corresponding ZMM register destination are unmodified.</p>",
+                "tooltip": "Adds two, four or eight packed double-precision floating-point values from the first source operand to the second source operand, and stores the packed double-precision floating-point result in the destination operand.",
                 "url": "http://www.felixcloutier.com/x86/ADDPD.html"
             };
 
         case "ADDPS":
         case "VADDPS":
             return {
-                "html": "<p>Add four, eight or sixteen packed single-precision floating-point values from the first source operand with the second source operand, and stores the packed single-precision floating-point results in the destination operand.</p><p>EVEX encoded versions: The first source operand is a ZMM/YMM/XMM register. The second source operand can be a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 32-bit memory location. The destination operand is a ZMM/YMM/XMM register conditionally updated with writemask k1.</p><p>VEX.256 encoded version: The first source operand is a YMM register. The second source operand can be a YMM register or a 256-bit memory location. The destination operand is a YMM register. The upper bits (MAXVL-1:256) of the corresponding ZMM register destination are zeroed.</p><p>VEX.128 encoded version: the first source operand is a XMM register. The second source operand is an XMM register or 128-bit memory location. The destination operand is an XMM register. The upper bits (MAXVL-1:128) of the corresponding ZMM register destination are zeroed.</p><p>128-bit Legacy SSE version: The second source can be an XMM register or an 128-bit memory location. The destination is not distinct from the first source XMM register and the upper Bits (MAXVL-1:128) of the corresponding ZMM register destination are unmodified.</p>",
-                "tooltip": "Add four, eight or sixteen packed single-precision floating-point values from the first source operand with the second source operand, and stores the packed single-precision floating-point results in the destination operand.",
+                "html": "<p>Adds four, eight or sixteen packed single-precision floating-point values from the first source operand with the second source operand, and stores the packed single-precision floating-point result in the destination operand.</p><p>EVEX encoded versions: The first source operand is a ZMM/YMM/XMM register. The second source operand can be a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 32-bit memory location. The destination operand is a ZMM/YMM/XMM register conditionally updated with writemask k1.</p><p>VEX.256 encoded version: The first source operand is a YMM register. The second source operand can be a YMM register or a 256-bit memory location. The destination operand is a YMM register. The upper bits (MAXVL-1:256) of the corresponding ZMM register destination are zeroed.</p><p>VEX.128 encoded version: the first source operand is a XMM register. The second source operand is an XMM register or 128-bit memory location. The destination operand is an XMM register. The upper bits (MAXVL-1:128) of the corresponding ZMM register destination are zeroed.</p><p>128-bit Legacy SSE version: The second source can be an XMM register or an 128-bit memory location. The destination is not distinct from the first source XMM register and the upper Bits (MAXVL-1:128) of the corresponding ZMM register destination are unmodified.</p>",
+                "tooltip": "Adds four, eight or sixteen packed single-precision floating-point values from the first source operand with the second source operand, and stores the packed single-precision floating-point result in the destination operand.",
                 "url": "http://www.felixcloutier.com/x86/ADDPS.html"
             };
 
@@ -109,32 +110,32 @@ export function getAsmOpcode(opcode) {
         case "AESDEC":
         case "VAESDEC":
             return {
-                "html": "<p>This instruction performs a single round of the AES decryption flow using the Equivalent Inverse Cipher, with the round key from the second source operand, operating on a 128-bit data (state) from the first source operand, and store the result in the destination operand.</p><p>Use the AESDEC instruction for all but the last decryption round. For the last decryption round, use the AESDECLAST instruction.</p><p>128-bit Legacy SSE version: The first source operand and the destination operand are the same and must be an XMM register. The second source operand can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the corresponding YMM destination register remain unchanged.</p><p>VEX.128 encoded version: The first source operand and the destination operand are XMM registers. The second source operand can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the destination YMM register are zeroed.</p>",
-                "tooltip": "This instruction performs a single round of the AES decryption flow using the Equivalent Inverse Cipher, with the round key from the second source operand, operating on a 128-bit data (state) from the first source operand, and store the result in the destination operand.",
+                "html": "<p>This instruction performs a single round of the AES decryption flow using the Equivalent Inverse Cipher, using one/two/four (depending on vector length) 128-bit data (state) from the first source operand with one/two/four (depending on vector length) round key(s) from the second source operand, and stores the result in the destination operand.</p><p>Use the AESDEC instruction for all but the last decryption round. For the last decryption round, use the AESDECLAST instruction.</p><p>VEX and EVEX encoded versions of the instruction allow 3-operand (non-destructive) operation. The legacy encoded versions of the instruction require that the first source operand and the destination operand are the same and must be an XMM register.</p><p>The EVEX encoded form of this instruction does not support memory fault suppression.</p>",
+                "tooltip": "This instruction performs a single round of the AES decryption flow using the Equivalent Inverse Cipher, using one/two/four (depending on vector length) 128-bit data (state) from the first source operand with one/two/four (depending on vector length) round key(s) from the second source operand, and stores the result in the destination operand.",
                 "url": "http://www.felixcloutier.com/x86/AESDEC.html"
             };
 
         case "AESDECLAST":
         case "VAESDECLAST":
             return {
-                "html": "<p>This instruction performs the last round of the AES decryption flow using the Equivalent Inverse Cipher, with the round key from the second source operand, operating on a 128-bit data (state) from the first source operand, and store the result in the destination operand.</p><p>128-bit Legacy SSE version: The first source operand and the destination operand are the same and must be an XMM register. The second source operand can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the corresponding YMM destination register remain unchanged.</p><p>VEX.128 encoded version: The first source operand and the destination operand are XMM registers. The second source operand can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the destination YMM register are zeroed.</p>",
-                "tooltip": "This instruction performs the last round of the AES decryption flow using the Equivalent Inverse Cipher, with the round key from the second source operand, operating on a 128-bit data (state) from the first source operand, and store the result in the destination operand.",
+                "html": "<p>This instruction performs the last round of the AES decryption flow using the Equivalent Inverse Cipher, using one/two/four (depending on vector length) 128-bit data (state) from the first source operand with one/two/four (depending on vector length) round key(s) from the second source operand, and stores the result in the destination operand.</p><p>VEX and EVEX encoded versions of the instruction allow 3-operand (non-destructive) operation. The legacy encoded versions of the instruction require that the first source operand and the destination operand are the same and must be an XMM register.</p><p>The EVEX encoded form of this instruction does not support memory fault suppression.</p>",
+                "tooltip": "This instruction performs the last round of the AES decryption flow using the Equivalent Inverse Cipher, using one/two/four (depending on vector length) 128-bit data (state) from the first source operand with one/two/four (depending on vector length) round key(s) from the second source operand, and stores the result in the destination operand.",
                 "url": "http://www.felixcloutier.com/x86/AESDECLAST.html"
             };
 
         case "AESENC":
         case "VAESENC":
             return {
-                "html": "<p>This instruction performs a single round of an AES encryption flow using a round key from the second source operand, operating on 128-bit data (state) from the first source operand, and store the result in the destination operand.</p><p>Use the AESENC instruction for all but the last encryption rounds. For the last encryption round, use the AESENCCLAST instruction.</p><p>128-bit Legacy SSE version: The first source operand and the destination operand are the same and must be an XMM register. The second source operand can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the corresponding YMM destination register remain unchanged.</p><p>VEX.128 encoded version: The first source operand and the destination operand are XMM registers. The second source operand can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the destination YMM register are zeroed.</p>",
-                "tooltip": "This instruction performs a single round of an AES encryption flow using a round key from the second source operand, operating on 128-bit data (state) from the first source operand, and store the result in the destination operand.",
+                "html": "<p>This instruction performs a single round of an AES encryption flow using one/two/four (depending on vector length) 128-bit data (state) from the first source operand with one/two/four (depending on vector length) round key(s) from the second source operand, and stores the result in the destination operand.</p><p>Use the AESENC instruction for all but the last encryption rounds. For the last encryption round, use the AESENCCLAST instruction.</p><p>VEX and EVEX encoded versions of the instruction allow 3-operand (non-destructive) operation. The legacy encoded versions of the instruction require that the first source operand and the destination operand are the same and must be an XMM register.</p><p>The EVEX encoded form of this instruction does not support memory fault suppression.</p>",
+                "tooltip": "This instruction performs a single round of an AES encryption flow using one/two/four (depending on vector length) 128-bit data (state) from the first source operand with one/two/four (depending on vector length) round key(s) from the second source operand, and stores the result in the destination operand.",
                 "url": "http://www.felixcloutier.com/x86/AESENC.html"
             };
 
         case "AESENCLAST":
         case "VAESENCLAST":
             return {
-                "html": "<p>This instruction performs the last round of an AES encryption flow using a round key from the second source operand, operating on 128-bit data (state) from the first source operand, and store the result in the destination operand.</p><p>128-bit Legacy SSE version: The first source operand and the destination operand are the same and must be an XMM register. The second source operand can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the corresponding YMM destination register remain unchanged.</p><p>VEX.128 encoded version: The first source operand and the destination operand are XMM registers. The second source operand can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the destination YMM register are zeroed.</p>",
-                "tooltip": "This instruction performs the last round of an AES encryption flow using a round key from the second source operand, operating on 128-bit data (state) from the first source operand, and store the result in the destination operand.",
+                "html": "<p>This instruction performs the last round of an AES encryption flow using one/two/four (depending on vector length) 128-bit data (state) from the first source operand with one/two/four (depending on vector length) round key(s) from the second source operand, and stores the result in the destination operand.</p><p>VEX and EVEX encoded versions of the instruction allows 3-operand (non-destructive) operation. The legacy encoded versions of the instruction require that the first source operand and the destination operand are the same and must be an XMM register.</p><p>The EVEX encoded form of this instruction does not support memory fault suppression.</p>",
+                "tooltip": "This instruction performs the last round of an AES encryption flow using one/two/four (depending on vector length) 128-bit data (state) from the first source operand with one/two/four (depending on vector length) round key(s) from the second source operand, and stores the result in the destination operand.",
                 "url": "http://www.felixcloutier.com/x86/AESENCLAST.html"
             };
 
@@ -319,7 +320,7 @@ export function getAsmOpcode(opcode) {
 
         case "BSF":
             return {
-                "html": "<p>Searches the source operand (second operand) for the least significant set bit (1 bit). If a least significant 1 bit is found, its bit index is stored in the destination operand (first operand). The source operand can be a register or a memory location; the destination operand is a register. The bit index is an unsigned offset from bit 0 of the source operand. If the content of the source operand is 0, the content of the destination operand is undefined.</p><p>In 64-bit mode, the instruction\u2019s default operation size is 32 bits. Using a REX prefix in the form of REX.R permits access to additional registers (R8-R15). Using a REX prefix in the form of REX.W promotes operation to 64 bits. See the summary chart at the beginning of this section for encoding data and limits.</p>",
+                "html": "<p>Searches the source operand (second operand) for the least significant set bit (1 bit). If a least significant 1 bit is found, its bit index is stored in the destination operand (first operand). The source operand can be a register or a memory location; the destination operand is a register. The bit index is an unsigned offset from bit 0 of the source operand. If the content of the source operand is 0, the content of the destination operand is undefined.</p><p>In 64-bit mode, the instruction\u2019s default operation size access to additional registers (R8-R15). Using a REX prefix in the form of REX.W promotes operation to 64 bits. See the summary chart at the beginning of this section for encoding data and limits.</p>",
                 "tooltip": "Searches the source operand (second operand) for the least significant set bit (1 bit). If a least significant 1 bit is found, its bit index is stored in the destination operand (first operand). The source operand can be a register or a memory location; the destination operand is a register. The bit index is an unsigned offset from bit 0 of the source operand. If the content of the source operand is 0, the content of the destination operand is undefined.",
                 "url": "http://www.felixcloutier.com/x86/BSF.html"
             };
@@ -419,7 +420,7 @@ export function getAsmOpcode(opcode) {
 
         case "CLFLUSH":
             return {
-                "html": "<p>Invalidates from every level of the cache hierarchy in the cache coherence domain the cache line that contains the linear address specified with the memory operand. If that cache line contains modified data at any level of the cache hierarchy, that data is written back to memory. The source operand is a byte memory location.</p><p>The availability of CLFLUSH is indicated by the presence of the CPUID feature flag CLFSH (CPUID.01H:EDX[bit 19]). The aligned cache line size affected is also indicated with the CPUID instruction (bits 8 through 15 of the EBX register when the initial value in the EAX register is 1).</p><p>The memory attribute of the page containing the affected line has no effect on the behavior of this instruction. It should be noted that processors are free to speculatively fetch and cache data from system memory regions assigned a memory-type allowing for speculative reads (such as, the WB, WC, and WT memory types). PREFETCH<em>h</em> instructions can be used to provide the processor with hints for this speculative behavior. Because this speculative fetching can occur at any time and is not tied to instruction execution, the CLFLUSH instruction is not ordered with respect to PREFETCH<em>h</em> instructions or any of the speculative fetching mechanisms (that is, data can be speculatively loaded into a cache line just before, during, or after the execution of a CLFLUSH instruction that references the cache line).</p><p>Executions of the CLFLUSH instruction are ordered with respect to each other and with respect to writes, locked read-modify-write instructions, fence instructions, and executions of CLFLUSHOPT to the same cache line.<sup>1</sup> They are not ordered with respect to executions of CLFLUSHOPT to different cache lines.</p><p>The CLFLUSH instruction can be used at all privilege levels and is subject to all permission checking and faults associated with a byte load (and in addition, a CLFLUSH instruction is allowed to flush a linear address in an execute-only segment). Like a load, the CLFLUSH instruction sets the A bit but not the D bit in the page tables.</p>",
+                "html": "<p>Invalidates from every level of the cache hierarchy in the cache coherence domain the cache line that contains the linear address specified with the memory operand. If that cache line contains modified data at any level of the cache hierarchy, that data is written back to memory. The source operand is a byte memory location.</p><p>The availability of CLFLUSH is indicated by the presence of the CPUID feature flag CLFSH (CPUID.01H:EDX[bit 19]). The aligned cache line size affected is also indicated with the CPUID instruction (bits 8 through 15 of the EBX register when the initial value in the EAX register is 1).</p><p>The memory attribute of the page containing the affected line has no effect on the behavior of this instruction. It should be noted that processors are free to speculatively fetch and cache data from system memory regions assigned a memory-type allowing for speculative reads (such as, the WB, WC, and WT memory types). PREFETCH<em>h</em> instructions can be used to provide the processor with hints for this speculative behavior. Because this speculative fetching can occur at any time and is not tied to instruction execution, the CLFLUSH instruction is not ordered with respect to PREFETCH<em>h</em> instructions or any of the speculative fetching mechanisms (that is, data can be speculatively loaded into a cache line just before, during, or after the execution of a CLFLUSH instruction that references the cache line).</p><p>Executions of the CLFLUSH instruction are ordered with respect to each other and with respect to writes, locked read-modify-write instructions, and fence instructions.<sup>1</sup> They are not ordered with respect to executions of CLFLUSHOPT and CLWB. Software can use the SFENCE instruction to order an execution of CLFLUSH relative to one of those operations.</p><p>The CLFLUSH instruction can be used at all privilege levels and is subject to all permission checking and faults associated with a byte load (and in addition, a CLFLUSH instruction is allowed to flush a linear address in an execute-only segment). Like a load, the CLFLUSH instruction sets the A bit but not the D bit in the page tables.</p>",
                 "tooltip": "Invalidates from every level of the cache hierarchy in the cache coherence domain the cache line that contains the linear address specified with the memory operand. If that cache line contains modified data at any level of the cache hierarchy, that data is written back to memory. The source operand is a byte memory location.",
                 "url": "http://www.felixcloutier.com/x86/CLFLUSH.html"
             };
@@ -431,6 +432,13 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/CLI.html"
             };
 
+        case "CLRSSBSY":
+            return {
+                "html": "<p>Clear busy flag in supervisor shadow stack token reference by m64. Subsequent to marking the shadow stack as not busy the SSP is loaded with value 0.</p>",
+                "tooltip": "Clear busy flag in supervisor shadow stack token reference by m64. Subsequent to marking the shadow stack as not busy the SSP is loaded with value 0.",
+                "url": "http://www.felixcloutier.com/x86/CLRSSBSY.html"
+            };
+
         case "CLTS":
             return {
                 "html": "<p>Clears the task-switched (TS) flag in the CR0 register. This instruction is intended for use in operating-system procedures. It is a privileged instruction that can only be executed at a CPL of 0. It is allowed to be executed in real-address mode to allow initialization for protected mode.</p><p>The processor sets the TS flag every time a task switch occurs. The flag is used to synchronize the saving of FPU context in multitasking applications. See the description of the TS flag in the section titled \u201cControl Registers\u201d in Chapter 2 of the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3A</em>, for more information about this flag.</p><p>CLTS operation is the same in non-64-bit modes and 64-bit mode.</p><p>See Chapter 25, \u201cVMX Non-Root Operation,\u201d of the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3C</em>, for more information about the behavior of this instruction in VMX non-root operation.</p>",
@@ -440,7 +448,7 @@ export function getAsmOpcode(opcode) {
 
         case "CLWB":
             return {
-                "html": "<p>Writes back to memory the cache line (if modified) that contains the linear address specified with the memory operand from any level of the cache hierarchy in the cache coherence domain. The line may be retained in the cache hierarchy in non-modified state. Retaining the line in the cache hierarchy is a performance optimization (treated as a hint by hardware) to reduce the possibility of cache miss on a subsequent access. Hardware may choose to retain the line at any of the levels in the cache hierarchy, and in some cases, may invalidate the line from the cache hierarchy. The source operand is a byte memory location.</p><p>The availability of CLWB instruction is indicated by the presence of the CPUID feature flag CLWB (bit 24 of the EBX register, see \u201cCPUID \u2014 CPU Identification\u201d in this chapter). The aligned cache line size affected is also indicated with the CPUID instruction (bits 8 through 15 of the EBX register when the initial value in the EAX register is 1).</p><p>The memory attribute of the page containing the affected line has no effect on the behavior of this instruction. It should be noted that processors are free to speculatively fetch and cache data from system memory regions that are assigned a memory-type allowing for speculative reads (such as, the WB, WC, and WT memory types). PREFETCHh instructions can be used to provide the processor with hints for this speculative behavior. Because this speculative fetching can occur at any time and is not tied to instruction execution, the CLWB instruction is not ordered with respect to PREFETCHh instructions or any of the speculative fetching mechanisms (that is, data can be speculatively loaded into a cache line just before, during, or after the execution of a CLWB instruction that references the cache line).</p><p>CLWB instruction is ordered only by store-fencing operations. For example, software can use an SFENCE, MFENCE, XCHG, or LOCK-prefixed instructions to ensure that previous stores are included in the write-back. CLWB instruction need not be ordered by another CLWB or CLFLUSHOPT instruction. CLWB is implicitly ordered with older stores executed by the logical processor to the same address.</p><p>For usages that require only writing back modified data from cache lines to memory (do not require the line to be invalidated), and expect to subsequently access the data, software is recommended to use CLWB (with appropriate fencing) instead of CLFLUSH or CLFLUSHOPT for improved performance.</p>",
+                "html": "<p>Writes back to memory the cache line (if modified) that contains the linear address specified with the memory operand from any level of the cache hierarchy in the cache coherence domain. The line may be retained in the cache hierarchy in non-modified state. Retaining the line in the cache hierarchy is a performance optimization (treated as a hint by hardware) to reduce the possibility of cache miss on a subsequent access. Hardware may choose to retain the line at any of the levels in the cache hierarchy, and in some cases, may invalidate the line from the cache hierarchy. The source operand is a byte memory location.</p><p>The availability of CLWB instruction is indicated by the presence of the CPUID feature flag CLWB (bit 24 of the EBX register, see \u201cCPUID \u2014 CPU Identification\u201d in this chapter). The aligned cache line size affected is also indicated with the CPUID instruction (bits 8 through 15 of the EBX register when the initial value in the EAX register is 1).</p><p>The memory attribute of the page containing the affected line has no effect on the behavior of this instruction. It should be noted that processors are free to speculatively fetch and cache data from system memory regions that are assigned a memory-type allowing for speculative reads (such as, the WB, WC, and WT memory types). PREFETCHh instructions can be used to provide the processor with hints for this speculative behavior. Because this speculative fetching can occur at any time and is not tied to instruction execution, the CLWB instruction is not ordered with respect to PREFETCHh instructions or any of the speculative fetching mechanisms (that is, data can be speculatively loaded into a cache line just before, during, or after the execution of a CLWB instruction that references the cache line).</p><p>Executions of the CLWB instruction are ordered with respect to fence instructions and to locked read-modify-write instructions; they are also ordered with respect to older writes to the cache line being written back. They are not ordered with respect to other executions of CLWB, to executions of CLFLUSH and CLFLUSHOPT, or to younger writes to the cache line being written back. Software can use the SFENCE instruction to order an execution of CLWB relative to one of those operations.</p><p>For usages that require only writing back modified data from cache lines to memory (do not require the line to be invalidated), and expect to subsequently access the data, software is recommended to use CLWB (with appropriate fencing) instead of CLFLUSH or CLFLUSHOPT for improved performance.</p>",
                 "tooltip": "Writes back to memory the cache line (if modified) that contains the linear address specified with the memory operand from any level of the cache hierarchy in the cache coherence domain. The line may be retained in the cache hierarchy in non-modified state. Retaining the line in the cache hierarchy is a performance optimization (treated as a hint by hardware) to reduce the possibility of cache miss on a subsequent access. Hardware may choose to retain the line at any of the levels in the cache hierarchy, and in some cases, may invalidate the line from the cache hierarchy. The source operand is a byte memory location.",
                 "url": "http://www.felixcloutier.com/x86/CLWB.html"
             };
@@ -483,8 +491,8 @@ export function getAsmOpcode(opcode) {
         case "CMOVS":
         case "CMOVZ":
             return {
-                "html": "<p>The CMOV<em>cc</em> instructions check the state of one or more of the status flags in the EFLAGS register (CF, OF, PF, SF, and ZF) and perform a move operation if the flags are in a specified state (or condition). A condition code (<em>cc</em>) is associated with each instruction to indicate the condition being tested for. If the condition is not satisfied, a move is not performed and execution continues with the instruction following the CMOV<em>cc</em> instruction.</p><p>These instructions can move 16-bit, 32-bit or 64-bit values from memory to a general-purpose register or from one general-purpose register to another. Conditional moves of 8-bit register operands are not supported.</p><p>The condition for each CMOV<em>cc</em> mnemonic is given in the description column of the above table. The terms \u201cless\u201d and \u201cgreater\u201d are used for comparisons of signed integers and the terms \u201cabove\u201d and \u201cbelow\u201d are used for unsigned integers.</p><p>Because a particular state of the status flags can sometimes be interpreted in two ways, two mnemonics are defined for some opcodes. For example, the CMOVA (conditional move if above) instruction and the CMOVNBE (conditional move if not below or equal) instruction are alternate mnemonics for the opcode 0F 47H.</p><p>The CMOV<em>cc</em> instructions were introduced in P6 family processors; however, these instructions may not be supported by all IA-32 processors. Software can determine if the CMOV<em>cc</em> instructions are supported by checking the processor\u2019s feature information with the CPUID instruction (see \u201cCPUID\u2014CPU Identification\u201d in this chapter).</p>",
-                "tooltip": "The CMOVcc instructions check the state of one or more of the status flags in the EFLAGS register (CF, OF, PF, SF, and ZF) and perform a move operation if the flags are in a specified state (or condition). A condition code (cc) is associated with each instruction to indicate the condition being tested for. If the condition is not satisfied, a move is not performed and execution continues with the instruction following the CMOVcc instruction.",
+                "html": "<p>Each of the CMOVcc instructions performs a move operation if the status flags in the EFLAGS register (CF, OF, PF, SF, and ZF) are in a specified state (or condition). A condition code (<em>cc</em>) is associated with each instruction to indicate the condition being tested for. If the condition is not satisfied, a move is not performed and execution continues with the instruction following the CMOV<em>cc</em> instruction.</p><p>Specifically, CMOVcc loads data from its source operand into a temporary register unconditionally (regardless of the condition code and the status flags in the EFLAGS register). If the condition code associated with the instruction (cc) is satisfied, the data in the temporary register is then copied into the instruction's destination operand.</p><p>These instructions can move 16-bit, 32-bit or 64-bit values from memory to a general-purpose register or from one general-purpose register to another. Conditional moves of 8-bit register operands are not supported.</p><p>The condition for each CMOV<em>cc</em> mnemonic is given in the description column of the above table. The terms \u201cless\u201d and \u201cgreater\u201d are used for comparisons of signed integers and the terms \u201cabove\u201d and \u201cbelow\u201d are used for unsigned integers.</p><p>Because a particular state of the status flags can sometimes be interpreted in two ways, two mnemonics are defined for some opcodes. For example, the CMOVA (conditional move if above) instruction and the CMOVNBE (conditional move if not below or equal) instruction are alternate mnemonics for the opcode 0F 47H.</p>",
+                "tooltip": "Each of the CMOVcc instructions performs a move operation if the status flags in the EFLAGS register (CF, OF, PF, SF, and ZF) are in a specified state (or condition). A condition code (cc) is associated with each instruction to indicate the condition being tested for. If the condition is not satisfied, a move is not performed and execution continues with the instruction following the CMOVcc instruction.",
                 "url": "http://www.felixcloutier.com/x86/CMOVcc.html"
             };
 
@@ -498,16 +506,16 @@ export function getAsmOpcode(opcode) {
         case "CMPPD":
         case "VCMPPD":
             return {
-                "html": "<p>Performs a SIMD compare of the packed double-precision floating-point values in the second source operand and the first source operand and returns the results of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each pair of packed values in the two source operands.</p><p>EVEX encoded versions: The first source operand (second operand) is a ZMM/YMM/XMM register. The second source operand can be a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 64-bit memory location. The destination operand (first operand) is an opmask register. Comparison results are written to the destination operand under the writemask k2. Each comparison result is a single mask bit of 1 (comparison true) or 0 (comparison false).</p><p>VEX.256 encoded version: The first source operand (second operand) is a YMM register. The second source operand (third operand) can be a YMM register or a 256-bit memory location. The destination operand (first operand) is a YMM register. Four comparisons are performed with results written to the destination operand. The result of each comparison is a quadword mask of all 1s (comparison true) or all 0s (comparison false).</p><p>128-bit Legacy SSE version: The first source and destination operand (first operand) is an XMM register. The second source operand (second operand) can be an XMM register or 128-bit memory location. Bits (MAXVL-1:128) of the corresponding ZMM destination register remain unchanged. Two comparisons are performed with results written to bits 127:0 of the destination operand. The result of each comparison is a quadword mask of all 1s (comparison true) or all 0s (comparison false).</p><p>VEX.128 encoded version: The first source operand (second operand) is an XMM register. The second source operand (third operand) can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the destination ZMM register are zeroed. Two comparisons are performed with results written to bits 127:0 of the destination operand.</p>",
-                "tooltip": "Performs a SIMD compare of the packed double-precision floating-point values in the second source operand and the first source operand and returns the results of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each pair of packed values in the two source operands.",
+                "html": "<p>Performs a SIMD compare of the packed double-precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each pair of packed values in the two source operands.</p><p>EVEX encoded versions: The first source operand (second operand) is a ZMM/YMM/XMM register. The second source operand can be a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 64-bit memory location. The destination operand (first operand) is an opmask register. Comparison results are written to the destination operand under the writemask k2. Each comparison result is a single mask bit of 1 (comparison true) or 0 (comparison false).</p><p>VEX.256 encoded version: The first source operand (second operand) is a YMM register. The second source operand (third operand) can be a YMM register or a 256-bit memory location. The destination operand (first operand) is a YMM register. Four comparisons are performed with results written to the destination operand. The result of each comparison is a quadword mask of all 1s (comparison true) or all 0s (comparison false).</p><p>128-bit Legacy SSE version: The first source and destination operand (first operand) is an XMM register. The second source operand (second operand) can be an XMM register or 128-bit memory location. Bits (MAXVL-1:128) of the corresponding ZMM destination register remain unchanged. Two comparisons are performed with results written to bits 127:0 of the destination operand. The result of each comparison is a quadword mask of all 1s (comparison true) or all 0s (comparison false).</p><p>VEX.128 encoded version: The first source operand (second operand) is an XMM register. The second source operand (third operand) can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the destination ZMM register are zeroed. Two comparisons are performed with results written to bits 127:0 of the destination operand.</p>",
+                "tooltip": "Performs a SIMD compare of the packed double-precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each pair of packed values in the two source operands.",
                 "url": "http://www.felixcloutier.com/x86/CMPPD.html"
             };
 
         case "CMPPS":
         case "VCMPPS":
             return {
-                "html": "<p>Performs a SIMD compare of the packed single-precision floating-point values in the second source operand and the first source operand and returns the results of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each of the pairs of packed values.</p><p>EVEX encoded versions: The first source operand (second operand) is a ZMM/YMM/XMM register. The second source operand can be a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 32-bit memory location. The destination operand (first operand) is an opmask register. Comparison results are written to the destination operand under the writemask k2. Each comparison result is a single mask bit of 1 (comparison true) or 0 (comparison false).</p><p>VEX.256 encoded version: The first source operand (second operand) is a YMM register. The second source operand (third operand) can be a YMM register or a 256-bit memory location. The destination operand (first operand) is a YMM register. Eight comparisons are performed with results written to the destination operand. The result of each comparison is a doubleword mask of all 1s (comparison true) or all 0s (comparison false).</p><p>128-bit Legacy SSE version: The first source and destination operand (first operand) is an XMM register. The second source operand (second operand) can be an XMM register or 128-bit memory location. Bits (MAXVL-1:128) of the corresponding ZMM destination register remain unchanged. Four comparisons are performed with results written to bits 127:0 of the destination operand. The result of each comparison is a doubleword mask of all 1s (comparison true) or all 0s (comparison false).</p><p>VEX.128 encoded version: The first source operand (second operand) is an XMM register. The second source operand (third operand) can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the destination ZMM register are zeroed. Four comparisons are performed with results written to bits 127:0 of the destination operand.</p>",
-                "tooltip": "Performs a SIMD compare of the packed single-precision floating-point values in the second source operand and the first source operand and returns the results of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each of the pairs of packed values.",
+                "html": "<p>Performs a SIMD compare of the packed single-precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each of the pairs of packed values.</p><p>EVEX encoded versions: The first source operand (second operand) is a ZMM/YMM/XMM register. The second source operand can be a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 32-bit memory location. The destination operand (first operand) is an opmask register. Comparison results are written to the destination operand under the writemask k2. Each comparison result is a single mask bit of 1 (comparison true) or 0 (comparison false).</p><p>VEX.256 encoded version: The first source operand (second operand) is a YMM register. The second source operand (third operand) can be a YMM register or a 256-bit memory location. The destination operand (first operand) is a YMM register. Eight comparisons are performed with results written to the destination operand. The result of each comparison is a doubleword mask of all 1s (comparison true) or all 0s (comparison false).</p><p>128-bit Legacy SSE version: The first source and destination operand (first operand) is an XMM register. The second source operand (second operand) can be an XMM register or 128-bit memory location. Bits (MAXVL-1:128) of the corresponding ZMM destination register remain unchanged. Four comparisons are performed with results written to bits 127:0 of the destination operand. The result of each comparison is a doubleword mask of all 1s (comparison true) or all 0s (comparison false).</p><p>VEX.128 encoded version: The first source operand (second operand) is an XMM register. The second source operand (third operand) can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the destina-</p>",
+                "tooltip": "Performs a SIMD compare of the packed single-precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate byte) specifies the type of comparison performed on each of the pairs of packed values.",
                 "url": "http://www.felixcloutier.com/x86/CMPPS.html"
             };
 
@@ -525,8 +533,8 @@ export function getAsmOpcode(opcode) {
         case "CMPSS":
         case "VCMPSS":
             return {
-                "html": "<p>Compares the low single-precision floating-point values in the second source operand and the first source operand and returns the results of the comparison to the destination operand. The comparison predicate operand (immediate operand) specifies the type of comparison performed.</p><p>128-bit Legacy SSE version: The first source and destination operand (first operand) is an XMM register. The second source operand (second operand) can be an XMM register or 32-bit memory location. Bits (MAXVL-1:32) of the corresponding YMM destination register remain unchanged. The comparison result is a doubleword mask of all 1s (comparison true) or all 0s (comparison false).</p><p>VEX.128 encoded version: The first source operand (second operand) is an XMM register. The second source operand (third operand) can be an XMM register or a 32-bit memory location. The result is stored in the low 32 bits of the destination operand; bits 128:32 of the destination operand are copied from the first source operand. Bits (MAXVL-1:128) of the destination ZMM register are zeroed. The comparison result is a doubleword mask of all 1s (comparison true) or all 0s (comparison false).</p><p>EVEX encoded version: The first source operand (second operand) is an XMM register. The second source operand can be a XMM register or a 32-bit memory location. The destination operand (first operand) is an opmask register. The comparison result is a single mask bit of 1 (comparison true) or 0 (comparison false), written to the destination starting from the LSB according to the writemask k2. Bits (MAX_KL-1:128) of the destination register are cleared.</p><p>The comparison predicate operand is an 8-bit immediate:</p>",
-                "tooltip": "Compares the low single-precision floating-point values in the second source operand and the first source operand and returns the results of the comparison to the destination operand. The comparison predicate operand (immediate operand) specifies the type of comparison performed.",
+                "html": "<p>Compares the low single-precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate operand) specifies the type of comparison performed.</p><p>128-bit Legacy SSE version: The first source and destination operand (first operand) is an XMM register. The second source operand (second operand) can be an XMM register or 32-bit memory location. Bits (MAXVL-1:32) of the corresponding YMM destination register remain unchanged. The comparison result is a doubleword mask of all 1s (comparison true) or all 0s (comparison false).</p><p>VEX.128 encoded version: The first source operand (second operand) is an XMM register. The second source operand (third operand) can be an XMM register or a 32-bit memory location. The result is stored in the low 32 bits of the destination operand; bits 127:32 of the destination operand are copied from the first source operand. Bits (MAXVL-1:128) of the destination ZMM register are zeroed. The comparison result is a doubleword mask of all 1s (comparison true) or all 0s (comparison false).</p><p>EVEX encoded version: The first source operand (second operand) is an XMM register. The second source operand can be a XMM register or a 32-bit memory location. The destination operand (first operand) is an opmask register. The comparison result is a single mask bit of 1 (comparison true) or 0 (comparison false), written to the destination starting from the LSB according to the writemask k2. Bits (MAX_KL-1:128) of the destination register are cleared.</p><p>The comparison predicate operand is an 8-bit immediate:</p>",
+                "tooltip": "Compares the low single-precision floating-point values in the second source operand and the first source operand and returns the result of the comparison to the destination operand. The comparison predicate operand (immediate operand) specifies the type of comparison performed.",
                 "url": "http://www.felixcloutier.com/x86/CMPSS.html"
             };
 
@@ -539,7 +547,7 @@ export function getAsmOpcode(opcode) {
 
         case "CMPXCHG8B":
             return {
-                "html": "<p>Compares the 64-bit value in EDX:EAX (or 128-bit value in RDX:RAX if operand size is 128 bits) with the operand (destination operand). If the values are equal, the 64-bit value in ECX:EBX (or 128-bit value in RCX:RBX) is stored in the destination operand. Otherwise, the value in the destination operand is loaded into EDX:EAX (or RDX:RAX). The destination operand is an 8-byte memory location (or 16-byte memory location if operand size is 128 bits). For the EDX:EAX and ECX:EBX register pairs, EDX and ECX contain the high-order 32 bits and EAX and EBX contain the low-order 32 bits of a 64-bit value. For the RDX:RAX and RCX:RBX register pairs, RDX and RCX contain the high-order 64 bits and RAX and RBX contain the low-order 64bits of a 128-bit value.</p><p>This instruction can be used with a LOCK prefix to allow the instruction to be executed atomically. To simplify the interface to the processor\u2019s bus, the destination operand receives a write cycle without regard to the result of the comparison. The destination operand is written back if the comparison fails; otherwise, the source operand is written into the destination. (The processor never produces a locked read without also producing a locked write.)</p><p>In 64-bit mode, default operation size is 64 bits. Use of the REX.W prefix promotes operation to 128 bits. Note that CMPXCHG16B requires that the destination (memory) operand be 16-byte aligned. See the summary chart at the beginning of this section for encoding data and limits. For information on the CPUID flag that indicates CMPXCHG16B, see page 3-213.</p><p>This instruction encoding is not supported on Intel processors earlier than the Pentium processors.</p>",
+                "html": "<p>Compares the 64-bit value in EDX:EAX (or 128-bit value in RDX:RAX if operand size is 128 bits) with the operand (destination operand). If the values are equal, the 64-bit value in ECX:EBX (or 128-bit value in RCX:RBX) is stored in the destination operand. Otherwise, the value in the destination operand is loaded into EDX:EAX (or RDX:RAX). The destination operand is an 8-byte memory location (or 16-byte memory location if operand size is 128 bits). For the EDX:EAX and ECX:EBX register pairs, EDX and ECX contain the high-order 32 bits and EAX and EBX contain the low-order 32 bits of a 64-bit value. For the RDX:RAX and RCX:RBX register pairs, RDX and RCX contain the high-order 64 bits and RAX and RBX contain the low-order 64bits of a 128-bit value.</p><p>This instruction can be used with a LOCK prefix to allow the instruction to be executed atomically. To simplify the interface to the processor\u2019s bus, the destination operand receives a write cycle without regard to the result of the comparison. The destination operand is written back if the comparison fails; otherwise, the source operand is written into the destination. (The processor never produces a locked read without also producing a locked write.)</p><p>In 64-bit mode, default operation size is 64 bits. Use of the REX.W prefix promotes operation to 128 bits. Note that CMPXCHG16B requires that the destination (memory) operand be 16-byte aligned. See the summary chart at the beginning of this section for encoding data and limits. For information on the CPUID flag that indicates CMPXCHG16B, see page 3-237.</p><p>This instruction encoding is not supported on Intel processors earlier than the Pentium processors.</p>",
                 "tooltip": "Compares the 64-bit value in EDX:EAX (or 128-bit value in RDX:RAX if operand size is 128 bits) with the operand (destination operand). If the values are equal, the 64-bit value in ECX:EBX (or 128-bit value in RCX:RBX) is stored in the destination operand. Otherwise, the value in the destination operand is loaded into EDX:EAX (or RDX:RAX). The destination operand is an 8-byte memory location (or 16-byte memory location if operand size is 128 bits). For the EDX:EAX and ECX:EBX register pairs, EDX and ECX contain the high-order 32 bits and EAX and EBX contain the low-order 32 bits of a 64-bit value. For the RDX:RAX and RCX:RBX register pairs, RDX and RCX contain the high-order 64 bits and RAX and RBX contain the low-order 64bits of a 128-bit value.",
                 "url": "http://www.felixcloutier.com/x86/CMPXCHG8B%3ACMPXCHG16B.html"
             };
@@ -547,7 +555,7 @@ export function getAsmOpcode(opcode) {
         case "COMISD":
         case "VCOMISD":
             return {
-                "html": "<p>Compares the double-precision floating-point values in the low quadwords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).</p><p>Operand 1 is an XMM register; operand 2 can be an XMM register or a 64 bit memory</p><p>location. The COMISD instruction differs from the UCOMISD instruction in that it signals a SIMD floating-point invalid operation exception (#I) when a source operand is either a QNaN or SNaN. The UCOMISD instruction signals an invalid numeric exception only if a source operand is an SNaN.</p><p>The EFLAGS register is not updated if an unmasked SIMD floating-point exception is generated.</p><p>VEX.vvvv and EVEX.vvvv are reserved and must be 1111b, otherwise instructions will #UD.</p>",
+                "html": "<p>Compares the double-precision floating-point values in the low quadwords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).</p><p>Operand 1 is an XMM register; operand 2 can be an XMM register or a 64 bit memory location. The COMISD instruction differs from the UCOMISD instruction in that it signals a SIMD floating-point invalid operation exception (#I) when a source operand is either a QNaN or SNaN. The UCOMISD instruction signals an invalid operation exception only if a source operand is an SNaN.</p><p>The EFLAGS register is not updated if an unmasked SIMD floating-point exception is generated.</p><p>VEX.vvvv and EVEX.vvvv are reserved and must be 1111b, otherwise instructions will #UD.</p><p>Software should ensure VCOMISD is encoded with VEX.L=0. Encoding VCOMISD with VEX.L=1 may encounter unpredictable behavior across different processor generations.</p>",
                 "tooltip": "Compares the double-precision floating-point values in the low quadwords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).",
                 "url": "http://www.felixcloutier.com/x86/COMISD.html"
             };
@@ -555,7 +563,7 @@ export function getAsmOpcode(opcode) {
         case "COMISS":
         case "VCOMISS":
             return {
-                "html": "<p>Compares the single-precision floating-point values in the low quadwords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).</p><p>Operand 1 is an XMM register; operand 2 can be an XMM register or a 32 bit memory location.</p><p>The COMISS instruction differs from the UCOMISS instruction in that it signals a SIMD floating-point invalid operation exception (#I) when a source operand is either a QNaN or SNaN. The UCOMISS instruction signals an invalid numeric exception only if a source operand is an SNaN.</p><p>The EFLAGS register is not updated if an unmasked SIMD floating-point exception is generated.</p><p>VEX.vvvv and EVEX.vvvv are reserved and must be 1111b, otherwise instructions will #UD.</p>",
+                "html": "<p>Compares the single-precision floating-point values in the low quadwords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).</p><p>Operand 1 is an XMM register; operand 2 can be an XMM register or a 32 bit memory location.</p><p>The COMISS instruction differs from the UCOMISS instruction in that it signals a SIMD floating-point invalid operation exception (#I) when a source operand is either a QNaN or SNaN. The UCOMISS instruction signals an invalid operation exception only if a source operand is an SNaN.</p><p>The EFLAGS register is not updated if an unmasked SIMD floating-point exception is generated.</p><p>VEX.vvvv and EVEX.vvvv are reserved and must be 1111b, otherwise instructions will #UD.</p>",
                 "tooltip": "Compares the single-precision floating-point values in the low quadwords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).",
                 "url": "http://www.felixcloutier.com/x86/COMISS.html"
             };
@@ -593,7 +601,7 @@ export function getAsmOpcode(opcode) {
         case "CVTPD2DQ":
         case "VCVTPD2DQ":
             return {
-                "html": "<p>Converts packed double-precision floating-point values in the source operand (second operand) to packed signed doubleword integers in the destination operand (first operand).</p><p>When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register or the embedded rounding control bits. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2<sup>w-1</sup>, where w represents the number of bits in the destination format) is returned.</p><p>EVEX encoded versions: The source operand is a ZMM/YMM/XMM register, a 512-bit memory location, or a 512-bit vector broadcasted from a 64-bit memory location. The destination operand is a ZMM/YMM/XMM register conditionally updated with writemask k1. The upper bits (MAXVL-1:256/128/64) of the corresponding destination are zeroed.</p><p>VEX.256 encoded version: The source operand is a YMM register or 256- bit memory location. The destination operand is an XMM register. The upper bits (MAXVL-1:128) of the corresponding ZMM register destination are zeroed.</p><p>VEX.128 encoded version: The source operand is an XMM register or 128- bit memory location. The destination operand is a XMM register. The upper bits (MAXVL-1:64) of the corresponding ZMM register destination are zeroed.</p>",
+                "html": "<p>Converts packed double-precision floating-point values in the source operand (second operand) to packed signed doubleword integers in the destination operand (first operand).</p><p>When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register or the embedded rounding control bits. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2<sup>w</sup>-1, where w represents the number of bits in the destination format) is returned.</p><p>EVEX encoded versions: The source operand is a ZMM/YMM/XMM register, a 512-bit memory location, or a 512-bit vector broadcasted from a 64-bit memory location. The destination operand is a ZMM/YMM/XMM register conditionally updated with writemask k1. The upper bits (MAXVL-1:256/128/64) of the corresponding destination are zeroed.</p><p>VEX.256 encoded version: The source operand is a YMM register or 256- bit memory location. The destination operand is an XMM register. The upper bits (MAXVL-1:128) of the corresponding ZMM register destination are zeroed.</p><p>VEX.128 encoded version: The source operand is an XMM register or 128- bit memory location. The destination operand is a XMM register. The upper bits (MAXVL-1:64) of the corresponding ZMM register destination are zeroed.</p>",
                 "tooltip": "Converts packed double-precision floating-point values in the source operand (second operand) to packed signed doubleword integers in the destination operand (first operand).",
                 "url": "http://www.felixcloutier.com/x86/CVTPD2DQ.html"
             };
@@ -630,7 +638,7 @@ export function getAsmOpcode(opcode) {
         case "CVTPS2DQ":
         case "VCVTPS2DQ":
             return {
-                "html": "<p>Converts four, eight or sixteen packed single-precision floating-point values in the source operand to four, eight or sixteen signed doubleword integers in the destination operand.</p><p>When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register or the embedded rounding control bits. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2<sup>w-1</sup>, where w represents the number of bits in the destination format) is returned.</p><p>EVEX encoded versions: The source operand is a ZMM register, a 512-bit memory location or a 512-bit vector broadcasted from a 32-bit memory location. The destination operand is a ZMM register conditionally updated with writemask k1.</p><p>VEX.256 encoded version: The source operand is a YMM register or 256- bit memory location. The destination operand is a YMM register. The upper bits (MAXVL-1:256) of the corresponding ZMM register destination are zeroed.</p><p>VEX.128 encoded version: The source operand is an XMM register or 128- bit memory location. The destination operand is a XMM register. The upper bits (MAXVL-1:128) of the corresponding ZMM register destination are zeroed.</p>",
+                "html": "<p>Converts four, eight or sixteen packed single-precision floating-point values in the source operand to four, eight or sixteen signed doubleword integers in the destination operand.</p><p>When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register or the embedded rounding control bits. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2<sup>w</sup>-1, where w represents the number of bits in the destination format) is returned.</p><p>EVEX encoded versions: The source operand is a ZMM register, a 512-bit memory location or a 512-bit vector broadcasted from a 32-bit memory location. The destination operand is a ZMM register conditionally updated with writemask k1.</p><p>VEX.256 encoded version: The source operand is a YMM register or 256- bit memory location. The destination operand is a YMM register. The upper bits (MAXVL-1:256) of the corresponding ZMM register destination are zeroed.</p><p>VEX.128 encoded version: The source operand is an XMM register or 128- bit memory location. The destination operand is a XMM register. The upper bits (MAXVL-1:128) of the corresponding ZMM register destination are zeroed.</p>",
                 "tooltip": "Converts four, eight or sixteen packed single-precision floating-point values in the source operand to four, eight or sixteen signed doubleword integers in the destination operand.",
                 "url": "http://www.felixcloutier.com/x86/CVTPS2DQ.html"
             };
@@ -693,8 +701,8 @@ export function getAsmOpcode(opcode) {
         case "CVTSS2SI":
         case "VCVTSS2SI":
             return {
-                "html": "<p>Converts a single-precision floating-point value in the source operand (the second operand) to a signed doubleword integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the single-precision floating-point value is contained in the low doubleword of the register.</p><p>When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register or the embedded rounding control bits. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2<sup>w-1</sup>, where w represents the number of bits in the destination format) is returned.</p><p>Legacy SSE instructions: In 64-bit mode, Use of the REX.W prefix promotes the instruction to produce 64-bit data. See the summary chart at the beginning of this section for encoding data and limits.</p><p>VEX.W1 and EVEX.W1 versions: promotes the instruction to produce 64-bit data in 64-bit mode.</p><p>Note: VEX.vvvv and EVEX.vvvv are reserved and must be 1111b, otherwise instructions will #UD.</p>",
-                "tooltip": "Converts a single-precision floating-point value in the source operand (the second operand) to a signed doubleword integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the single-precision floating-point value is contained in the low doubleword of the register.",
+                "html": "<p>Converts a single-precision floating-point value in the source operand (the second operand) to a signed double-word integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the single-precision floating-point value is contained in the low doubleword of the register.</p><p>When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register or the embedded rounding control bits. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2<sup>w</sup>-1, where w represents the number of bits in the destination format) is returned.</p><p>Legacy SSE instructions: In 64-bit mode, Use of the REX.W prefix promotes the instruction to produce 64-bit data. See the summary chart at the beginning of this section for encoding data and limits.</p><p>VEX.W1 and EVEX.W1 versions: promotes the instruction to produce 64-bit data in 64-bit mode.</p><p>Note: VEX.vvvv and EVEX.vvvv are reserved and must be 1111b, otherwise instructions will #UD.</p>",
+                "tooltip": "Converts a single-precision floating-point value in the source operand (the second operand) to a signed double-word integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the single-precision floating-point value is contained in the low doubleword of the register.",
                 "url": "http://www.felixcloutier.com/x86/CVTSS2SI.html"
             };
 
@@ -739,8 +747,8 @@ export function getAsmOpcode(opcode) {
         case "CVTTSS2SI":
         case "VCVTTSS2SI":
             return {
-                "html": "<p>Converts a single-precision floating-point value in the source operand (the second operand) to a signed double-word integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a 32-bit memory location. The destination operand is a general purpose register. When the source operand is an XMM register, the single-precision floating-point value is contained in the low doubleword of the register.</p><p>When a conversion is inexact, a truncated (round toward zero) result is returned. If a converted result is larger than the maximum signed doubleword integer, the floating-point invalid exception is raised. If this exception is masked, the indefinite integer value (80000000H or 80000000_00000000H if operand size is 64 bits) is returned.</p><p>Legacy SSE instructions: In 64-bit mode, Use of the REX.W prefix promotes the instruction to 64-bit operation. See the summary chart at the beginning of this section for encoding data and limits.</p><p>VEX.W1 and EVEX.W1 versions: promotes the instruction to produce 64-bit data in 64-bit mode.</p><p>Note: VEX.vvvv and EVEX.vvvv are reserved and must be 1111b, otherwise instructions will #UD.</p>",
-                "tooltip": "Converts a single-precision floating-point value in the source operand (the second operand) to a signed double-word integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a 32-bit memory location. The destination operand is a general purpose register. When the source operand is an XMM register, the single-precision floating-point value is contained in the low doubleword of the register.",
+                "html": "<p>Converts a single-precision floating-point value in the source operand (the second operand) to a signed doubleword integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a 32-bit memory location. The destination operand is a general purpose register. When the source operand is an XMM register, the single-precision floating-point value is contained in the low doubleword of the register.</p><p>When a conversion is inexact, a truncated (round toward zero) result is returned. If a converted result is larger than the maximum signed doubleword integer, the floating-point invalid exception is raised. If this exception is masked, the indefinite integer value (80000000H or 80000000_00000000H if operand size is 64 bits) is returned.</p><p>Legacy SSE instructions: In 64-bit mode, Use of the REX.W prefix promotes the instruction to 64-bit operation. See the summary chart at the beginning of this section for encoding data and limits.</p><p>VEX.W1 and EVEX.W1 versions: promotes the instruction to produce 64-bit data in 64-bit mode.</p><p>Note: VEX.vvvv and EVEX.vvvv are reserved and must be 1111b, otherwise instructions will #UD.</p>",
+                "tooltip": "Converts a single-precision floating-point value in the source operand (the second operand) to a signed doubleword integer (or signed quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a 32-bit memory location. The destination operand is a general purpose register. When the source operand is an XMM register, the single-precision floating-point value is contained in the low doubleword of the register.",
                 "url": "http://www.felixcloutier.com/x86/CVTTSS2SI.html"
             };
 
@@ -829,6 +837,13 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/DPPS.html"
             };
 
+        case "EAX":
+            return {
+                "html": "<p>This leaf function copies a page from regular main memory to the EPC. As part of the copying process, the page is cryptographically authenticated and decrypted. This instruction can only be executed when current privilege level is 0.</p><p>The ELDB leaf function sets the BLOCK bit in the EPCM entry for the destination page in the EPC after copying. The ELDU leaf function clears the BLOCK bit in the EPCM entry for the destination page in the EPC after copying.</p><p>RBX contains the effective address of a PAGEINFO structure; RCX contains the effective address of the destination EPC page; RDX holds the effective address of the version array slot that holds the version of the page.</p><p>The ELDBC/ELDUC leafs are very similar to ELDB and ELDU. They provide an error code on the concurrency conflict for any of the pages which need to acquire a lock. These include the destination, SECS, and VA slot.</p><p>The table below provides additional information on the memory parameter of ELDB/ELDU leaf functions.</p>",
+                "tooltip": "This leaf function copies a page from regular main memory to the EPC. As part of the copying process, the page is cryptographically authenticated and decrypted. This instruction can only be executed when current privilege level is 0.",
+                "url": "http://www.felixcloutier.com/x86/ELDB%3AELDU%3AELDBC%3AELDUC.html"
+            };
+
         case "EMMS":
             return {
                 "html": "<p>Sets the values of all the tags in the x87 FPU tag word to empty (all 1s). This operation marks the x87 FPU data registers (which are aliased to the MMX technology registers) as available for use by x87 FPU floating-point instructions. (See <span class=\"not-imported\">Figure 8-7</span> in the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1</em>, for the format of the x87 FPU tag word.) All other MMX instructions (other than the EMMS instruction) set all the tags in x87 FPU tag word to valid (all 0s).</p><p>The EMMS instruction must be used to clear the MMX technology state at the end of all MMX technology procedures or subroutines and before calling other procedures or subroutines that may execute x87 floating-point instructions. If a floating-point instruction loads one of the registers in the x87 FPU data register stack before the x87 FPU tag word has been reset by the EMMS instruction, an x87 floating-point register stack overflow can occur that will result in an x87 floating-point exception or incorrect result.</p><p>EMMS operation is the same in non-64-bit modes and 64-bit mode.</p>",
@@ -843,6 +858,20 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/ENCLV.html"
             };
 
+        case "ENDBR32":
+            return {
+                "html": "<p>Terminate an indirect branch in 32 bit and compatibility mode.</p>",
+                "tooltip": "Terminate an indirect branch in 32 bit and compatibility mode.",
+                "url": "http://www.felixcloutier.com/x86/ENDBR32.html"
+            };
+
+        case "ENDBR64":
+            return {
+                "html": "<p>Terminate an indirect branch in 64 bit mode.</p>",
+                "tooltip": "Terminate an indirect branch in 64 bit mode.",
+                "url": "http://www.felixcloutier.com/x86/ENDBR64.html"
+            };
+
         case "ENTER":
             return {
                 "html": "<p>Creates a stack frame (comprising of space for dynamic storage and 1-32 frame pointer storage) for a procedure. The first operand (imm16) specifies the size of the dynamic storage in the stack frame (that is, the number of bytes of dynamically allocated on the stack for the procedure). The second operand (imm8) gives the lexical nesting level (0 to 31) of the procedure. The nesting level (imm8 mod 32) and the OperandSize attribute determine the size in bytes of the storage space for frame pointers.</p><p>The nesting level determines the number of frame pointers that are copied into the \u201cdisplay area\u201d of the new stack frame from the preceding frame. The default size of the frame pointer is the StackAddrSize attribute, but can be overridden using the 66H prefix. Thus, the OperandSize attribute determines the size of each frame pointer that will be copied into the stack frame and the data being transferred from SP/ESP/RSP register into the BP/EBP/RBP register.</p><p>The ENTER and companion LEAVE instructions are provided to support block structured languages. The ENTER instruction (when used) is typically the first instruction in a procedure and is used to set up a new stack frame for a procedure. The LEAVE instruction is then used at the end of the procedure (just before the RET instruction) to release the stack frame.</p><p>If the nesting level is 0, the processor pushes the frame pointer from the BP/EBP/RBP register onto the stack, copies the current stack pointer from the SP/ESP/RSP register into the BP/EBP/RBP register, and loads the SP/ESP/RSP register with the current stack-pointer value minus the value in the size operand. For nesting levels of 1 or greater, the processor pushes additional frame pointers on the stack before adjusting the stack pointer. These additional frame pointers provide the called procedure with access points to other nested frames on the stack. See \u201cProcedure Calls for Block-Structured Languages\u201d in Chapter 6 of the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1</em>, for more information about the actions of the ENTER instruction.</p><p>The ENTER instruction causes a page fault whenever a write using the final value of the stack pointer (within the current stack segment) would do so.</p>",
@@ -858,20 +887,6 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/EXTRACTPS.html"
             };
 
-        case "F2XM1":
-            return {
-                "html": "<p>Computes the exponential value of 2 to the power of the source operand minus 1. The source operand is located in register ST(0) and the result is also stored in ST(0). The value of the source operand must lie in the range \u20131.0 to +1.0. If the source value is outside this range, the result is undefined.</p><p>The following table shows the results obtained when computing the exponential value of various classes of numbers, assuming that neither overflow nor underflow occurs.</p><p>Values other than 2 can be exponentiated using the following formula:</p>",
-                "tooltip": "Computes the exponential value of 2 to the power of the source operand minus 1. The source operand is located in register ST(0) and the result is also stored in ST(0). The value of the source operand must lie in the range \u20131.0 to +1.0. If the source value is outside this range, the result is undefined.",
-                "url": "http://www.felixcloutier.com/x86/F2XM1.html"
-            };
-
-        case "FABS":
-            return {
-                "html": "<p>Clears the sign bit of ST(0) to create the absolute value of the operand. The following table shows the results obtained when creating the absolute value of various classes of numbers.</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
-                "tooltip": "Clears the sign bit of ST(0) to create the absolute value of the operand. The following table shows the results obtained when creating the absolute value of various classes of numbers.",
-                "url": "http://www.felixcloutier.com/x86/FABS.html"
-            };
-
         case "FADD":
         case "FADDP":
         case "FIADD":
@@ -888,24 +903,10 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/FBLD.html"
             };
 
-        case "FBSTP":
-            return {
-                "html": "<p>Converts the value in the ST(0) register to an 18-digit packed BCD integer, stores the result in the destination operand, and pops the register stack. If the source value is a non-integral value, it is rounded to an integer value, according to rounding mode specified by the RC field of the FPU control word. To pop the register stack, the processor marks the ST(0) register as empty and increments the stack pointer (TOP) by 1.</p><p>The destination operand specifies the address where the first byte destination value is to be stored. The BCD value (including its sign bit) requires 10 bytes of space in memory.</p><p>The following table shows the results obtained when storing various classes of numbers in packed BCD format.</p><p>If the converted value is too large for the destination format, or if the source operand is an \u221e, SNaN, QNAN, or is in an unsupported format, an invalid-arithmetic-operand condition is signaled. If the invalid-operation exception is not masked, an invalid-arithmetic-operand exception (#IA) is generated and no value is stored in the destination operand. If the invalid-operation exception is masked, the packed BCD indefinite value is stored in memory.</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
-                "tooltip": "Converts the value in the ST(0) register to an 18-digit packed BCD integer, stores the result in the destination operand, and pops the register stack. If the source value is a non-integral value, it is rounded to an integer value, according to rounding mode specified by the RC field of the FPU control word. To pop the register stack, the processor marks the ST(0) register as empty and increments the stack pointer (TOP) by 1.",
-                "url": "http://www.felixcloutier.com/x86/FBSTP.html"
-            };
-
-        case "FCHS":
-            return {
-                "html": "<p>Complements the sign bit of ST(0). This operation changes a positive value into a negative value of equal magnitude or vice versa. The following table shows the results obtained when changing the sign of various classes of numbers.</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
-                "tooltip": "Complements the sign bit of ST(0). This operation changes a positive value into a negative value of equal magnitude or vice versa. The following table shows the results obtained when changing the sign of various classes of numbers.",
-                "url": "http://www.felixcloutier.com/x86/FCHS.html"
-            };
-
         case "FCLEX":
         case "FNCLEX":
             return {
-                "html": "<p>Clears the floating-point exception flags (PE, UE, OE, ZE, DE, and IE), the exception summary status flag (ES), the stack fault flag (SF), and the busy flag (B) in the FPU status word. The FCLEX instruction checks for and handles any pending unmasked floating-point exceptions before clearing the exception flags; the FNCLEX instruction does not.</p><p>The assembler issues two instructions for the FCLEX instruction (an FWAIT instruction followed by an FNCLEX instruction), and the processor executes each of these instructions separately. If an exception is generated for either of these instructions, the save EIP points to the instruction that caused the exception.</p><p>When operating a Pentium or Intel486 processor in MS-DOS* compatibility mode, it is possible (under unusual circumstances) for an FNCLEX instruction to be interrupted prior to being executed to handle a pending FPU exception. See the section titled \u201cNo-Wait FPU Instructions Can Get FPU Interrupt in Window\u201d in Appendix D of the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1</em>, for a description of these circumstances. An FNCLEX instruction cannot be interrupted in this way on later Intel processors, except for the Intel Quark<sup>TM</sup> X1000 processor.</p><p>This instruction affects only the x87 FPU floating-point exception flags. It does not affect the SIMD floating-point exception flags in the MXCRS register.</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
+                "html": "<p>Clears the floating-point exception flags (PE, UE, OE, ZE, DE, and IE), the exception summary status flag (ES), the stack fault flag (SF), and the busy flag (B) in the FPU status word. The FCLEX instruction checks for and handles any pending unmasked floating-point exceptions before clearing the exception flags; the FNCLEX instruction does not.</p><p>The assembler issues two instructions for the FCLEX instruction (an FWAIT instruction followed by an FNCLEX instruction), and the processor executes each of these instructions separately. If an exception is generated for either of these instructions, the save EIP points to the instruction that caused the exception.</p><p>When operating a Pentium or Intel486 processor in MS-DOS* compatibility mode, it is possible (under unusual circumstances) for an FNCLEX instruction to be interrupted prior to being executed to handle a pending FPU exception. See the section titled \u201cNo-Wait FPU Instructions Can Get FPU Interrupt in Window\u201d in Appendix D of the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1</em>, for a description of these circumstances. An FNCLEX instruction cannot be interrupted in this way on later Intel processors, except for the Intel Quark<sup>TM</sup> X1000 processor.</p><p>This instruction affects only the x87 FPU floating-point exception flags. It does not affect the SIMD floating-point exception flags in the MXCSR register.</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
                 "tooltip": "Clears the floating-point exception flags (PE, UE, OE, ZE, DE, and IE), the exception summary status flag (ES), the stack fault flag (SF), and the busy flag (B) in the FPU status word. The FCLEX instruction checks for and handles any pending unmasked floating-point exceptions before clearing the exception flags; the FNCLEX instruction does not.",
                 "url": "http://www.felixcloutier.com/x86/FCLEX%3AFNCLEX.html"
             };
@@ -943,20 +944,6 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/FCOMI%3AFCOMIP%3AFUCOMI%3AFUCOMIP.html"
             };
 
-        case "FCOS":
-            return {
-                "html": "<p>Computes the approximate cosine of the source operand in register ST(0) and stores the result in ST(0). The source operand must be given in radians and must be within the range \u22122<sup>63</sup> to +2<sup>63</sup>. The following table shows the results obtained when taking the cosine of various classes of numbers.</p><p>If the source operand is outside the acceptable range, the C2 flag in the FPU status word is set, and the value in register ST(0) remains unchanged. The instruction does not raise an exception when the source operand is out of range. It is up to the program to check the C2 flag for out-of-range conditions. Source values outside the range \u2212 2<sup>63</sup> to +2<sup>63</sup> can be reduced to the range of the instruction by subtracting an appropriate integer multiple of 2\u03c0. However, even within the range -2<sup>63</sup> to +2<sup>63</sup>, inaccurate results can occur because the finite approximation of \u03c0 used internally for argument reduction is not sufficient in all cases. Therefore, for accurate results it is safe to apply FCOS only to arguments reduced accurately in software, to a value smaller in absolute value than 3\u03c0/8. See the sections titled \u201cApproximation of Pi\u201d and \u201cTranscendental Instruction Accuracy\u201d in Chapter 8 of the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1</em>, for a discussion of the proper value to use for \u03c0 in performing such reductions.</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
-                "tooltip": "Computes the approximate cosine of the source operand in register ST(0) and stores the result in ST(0). The source operand must be given in radians and must be within the range \u2212263 to +263. The following table shows the results obtained when taking the cosine of various classes of numbers.",
-                "url": "http://www.felixcloutier.com/x86/FCOS.html"
-            };
-
-        case "FDECSTP":
-            return {
-                "html": "<p>Subtracts one from the TOP field of the FPU status word (decrements the top-of-stack pointer). If the TOP field contains a 0, it is set to 7. The effect of this instruction is to rotate the stack by one position. The contents of the FPU data registers and tag register are not affected.</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
-                "tooltip": "Subtracts one from the TOP field of the FPU status word (decrements the top-of-stack pointer). If the TOP field contains a 0, it is set to 7. The effect of this instruction is to rotate the stack by one position. The contents of the FPU data registers and tag register are not affected.",
-                "url": "http://www.felixcloutier.com/x86/FDECSTP.html"
-            };
-
         case "FDIV":
         case "FDIVP":
         case "FIDIV":
@@ -975,17 +962,10 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/FDIVR%3AFDIVRP%3AFIDIVR.html"
             };
 
-        case "FFREE":
-            return {
-                "html": "<p>Sets the tag in the FPU tag register associated with register ST(i) to empty (11B). The contents of ST(i) and the FPU stack-top pointer (TOP) are not affected.</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
-                "tooltip": "Sets the tag in the FPU tag register associated with register ST(i) to empty (11B). The contents of ST(i) and the FPU stack-top pointer (TOP) are not affected.",
-                "url": "http://www.felixcloutier.com/x86/FFREE.html"
-            };
-
         case "FICOM":
         case "FICOMP":
             return {
-                "html": "<p>Compares the value in ST(0) with an integer source operand and sets the condition code flags C0, C2, and C3 in the FPU status word according to the results (see table below). The integer value is converted to double extended-precision floating-point format before the comparison is made.</p><p>These instructions perform an \u201cunordered comparison.\u201d An unordered comparison also checks the class of the numbers being compared (see \u201cFXAM\u2014Examine Floating-Point\u201d in this chapter). If either operand is a NaN or is in an undefined format, the condition flags are set to \u201cunordered.\u201d</p><p>The sign of zero is ignored, so that \u20130.0 \u2190 +0.0.</p><p>The FICOMP instructions pop the register stack following the comparison. To pop the register stack, the processor marks the ST(0) register empty and increments the stack pointer (TOP) by 1.</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
+                "html": "<p>Compares the value in ST(0) with an integer source operand and sets the condition code flags C0, C2, and C3 in the FPU status word according to the results (see table below). The integer value is converted to double extended-precision floating-point format before the comparison is made.</p><p>These instructions perform an \u201cunordered comparison.\u201d An unordered comparison also checks the class of the numbers being compared (see \u201cFXAM\u2014Examine Floating-Point\u201d in this chapter). If either operand is a NaN or is in an undefined format, the condition flags are set to \u201cunordered.\u201d</p><p>The sign of zero is ignored, so that \u20130.0 := +0.0.</p><p>The FICOMP instructions pop the register stack following the comparison. To pop the register stack, the processor marks the ST(0) register empty and increments the stack pointer (TOP) by 1.</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
                 "tooltip": "Compares the value in ST(0) with an integer source operand and sets the condition code flags C0, C2, and C3 in the FPU status word according to the results (see table below). The integer value is converted to double extended-precision floating-point format before the comparison is made.",
                 "url": "http://www.felixcloutier.com/x86/FICOM%3AFICOMP.html"
             };
@@ -997,13 +977,6 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/FILD.html"
             };
 
-        case "FINCSTP":
-            return {
-                "html": "<p>Adds one to the TOP field of the FPU status word (increments the top-of-stack pointer). If the TOP field contains a 7, it is set to 0. The effect of this instruction is to rotate the stack by one position. The contents of the FPU data registers and tag register are not affected. This operation is not equivalent to popping the stack, because the tag for the previous top-of-stack register is not marked empty.</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
-                "tooltip": "Adds one to the TOP field of the FPU status word (increments the top-of-stack pointer). If the TOP field contains a 7, it is set to 0. The effect of this instruction is to rotate the stack by one position. The contents of the FPU data registers and tag register are not affected. This operation is not equivalent to popping the stack, because the tag for the previous top-of-stack register is not marked empty.",
-                "url": "http://www.felixcloutier.com/x86/FINCSTP.html"
-            };
-
         case "FINIT":
         case "FNINIT":
             return {
@@ -1047,20 +1020,6 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/FLD1%3AFLDL2T%3AFLDL2E%3AFLDPI%3AFLDLG2%3AFLDLN2%3AFLDZ.html"
             };
 
-        case "FLDCW":
-            return {
-                "html": "<p>Loads the 16-bit source operand into the FPU control word. The source operand is a memory location. This instruction is typically used to establish or change the FPU\u2019s mode of operation.</p><p>If one or more exception flags are set in the FPU status word prior to loading a new FPU control word and the new control word unmasks one or more of those exceptions, a floating-point exception will be generated upon execution of the next floating-point instruction (except for the no-wait floating-point instructions, see the section titled \u201cSoftware Exception Handling\u201d in Chapter 8 of the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1</em>). To avoid raising exceptions when changing FPU operating modes, clear any pending exceptions (using the FCLEX or FNCLEX instruction) before loading the new control word.</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
-                "tooltip": "Loads the 16-bit source operand into the FPU control word. The source operand is a memory location. This instruction is typically used to establish or change the FPU\u2019s mode of operation.",
-                "url": "http://www.felixcloutier.com/x86/FLDCW.html"
-            };
-
-        case "FLDENV":
-            return {
-                "html": "<p>Loads the complete x87 FPU operating environment from memory into the FPU registers. The source operand specifies the first byte of the operating-environment data in memory. This data is typically written to the specified memory location by a FSTENV or FNSTENV instruction.</p><p>The FPU operating environment consists of the FPU control word, status word, tag word, instruction pointer, data pointer, and last opcode. Figures 8-9 through 8-12 in the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1</em>, show the layout in memory of the loaded environment, depending on the operating mode of the processor (protected or real) and the current operand-size attribute (16-bit or 32-bit). In virtual-8086 mode, the real mode layouts are used.</p><p>The FLDENV instruction should be executed in the same operating mode as the corresponding FSTENV/FNSTENV instruction.</p><p>If one or more unmasked exception flags are set in the new FPU status word, a floating-point exception will be generated upon execution of the next floating-point instruction (except for the no-wait floating-point instructions, see the section titled \u201cSoftware Exception Handling\u201d in Chapter 8 of the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1</em>). To avoid generating exceptions when loading a new environment, clear all the exception flags in the FPU status word that is being loaded.</p><p>If a page or limit fault occurs during the execution of this instruction, the state of the x87 FPU registers as seen by the fault handler may be different than the state being loaded from memory. In such situations, the fault handler should ignore the status of the x87 FPU registers, handle the fault, and return. The FLDENV instruction will then complete the loading of the x87 FPU registers with no resulting context inconsistency.</p>",
-                "tooltip": "Loads the complete x87 FPU operating environment from memory into the FPU registers. The source operand specifies the first byte of the operating-environment data in memory. This data is typically written to the specified memory location by a FSTENV or FNSTENV instruction.",
-                "url": "http://www.felixcloutier.com/x86/FLDENV.html"
-            };
-
         case "FIMUL":
         case "FMUL":
         case "FMULP":
@@ -1070,30 +1029,16 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/FMUL%3AFMULP%3AFIMUL.html"
             };
 
-        case "FNOP":
-            return {
-                "html": "<p>Performs no FPU operation. This instruction takes up space in the instruction stream but does not affect the FPU or machine context, except the EIP register and the FPU Instruction Pointer.</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
-                "tooltip": "Performs no FPU operation. This instruction takes up space in the instruction stream but does not affect the FPU or machine context, except the EIP register and the FPU Instruction Pointer.",
-                "url": "http://www.felixcloutier.com/x86/FNOP.html"
-            };
-
-        case "FPATAN":
-            return {
-                "html": "<p>Computes the arctangent of the source operand in register ST(1) divided by the source operand in register ST(0), stores the result in ST(1), and pops the FPU register stack. The result in register ST(0) has the same sign as the source operand ST(1) and a magnitude less than +\u03c0.</p><p>The FPATAN instruction returns the angle between the X axis and the line from the origin to the point (X,Y), where Y (the ordinate) is ST(1) and X (the abscissa) is ST(0). The angle depends on the sign of X and Y independently, not just on the sign of the ratio Y/X. This is because a point (\u2212X,Y) is in the second quadrant, resulting in an angle between \u03c0/2 and \u03c0, while a point (X,\u2212Y) is in the fourth quadrant, resulting in an angle between 0 and \u2212\u03c0/2. A point (\u2212X,\u2212Y) is in the third quadrant, giving an angle between \u2212\u03c0/2 and \u2212\u03c0.</p><p>The following table shows the results obtained when computing the arctangent of various classes of numbers, assuming that underflow does not occur.</p><p>There is no restriction on the range of source operands that FPATAN can accept.</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
-                "tooltip": "Computes the arctangent of the source operand in register ST(1) divided by the source operand in register ST(0), stores the result in ST(1), and pops the FPU register stack. The result in register ST(0) has the same sign as the source operand ST(1) and a magnitude less than +\u03c0.",
-                "url": "http://www.felixcloutier.com/x86/FPATAN.html"
-            };
-
         case "FPREM":
             return {
-                "html": "<p>Computes the remainder obtained from dividing the value in the ST(0) register (the dividend) by the value in the ST(1) register (the divisor or <strong>modulus</strong>), and stores the result in ST(0). The remainder represents the following value:</p><p>Remainder \u2190 ST(0) \u2212 (Q \u2217 ST(1))</p><p>Here, Q is an integer value that is obtained by truncating the floating-point number quotient of [ST(0) / ST(1)] toward zero. The sign of the remainder is the same as the sign of the dividend. The magnitude of the remainder is less than that of the modulus, unless a partial remainder was computed (as described below).</p><p>This instruction produces an exact result; the inexact-result exception does not occur and the rounding control has no effect. The following table shows the results obtained when computing the remainder of various classes of numbers, assuming that underflow does not occur.</p><p>When the result is 0, its sign is the same as that of the dividend. When the modulus is \u221e, the result is equal to the value in ST(0).</p>",
+                "html": "<p>Computes the remainder obtained from dividing the value in the ST(0) register (the dividend) by the value in the ST(1) register (the divisor or <strong>modulus</strong>), and stores the result in ST(0). The remainder represents the following value:</p><p>Remainder := ST(0) \u2212 (Q \u2217 ST(1))</p><p>Here, Q is an integer value that is obtained by truncating the floating-point number quotient of [ST(0) / ST(1)] toward zero. The sign of the remainder is the same as the sign of the dividend. The magnitude of the remainder is less than that of the modulus, unless a partial remainder was computed (as described below).</p><p>This instruction produces an exact result; the inexact-result exception does not occur and the rounding control has no effect. The following table shows the results obtained when computing the remainder of various classes of numbers, assuming that underflow does not occur.</p><p>When the result is 0, its sign is the same as that of the dividend. When the modulus is \u221e, the result is equal to the value in ST(0).</p>",
                 "tooltip": "Computes the remainder obtained from dividing the value in the ST(0) register (the dividend) by the value in the ST(1) register (the divisor or modulus), and stores the result in ST(0). The remainder represents the following value",
                 "url": "http://www.felixcloutier.com/x86/FPREM.html"
             };
 
         case "FPREM1":
             return {
-                "html": "<p>Computes the IEEE remainder obtained from dividing the value in the ST(0) register (the dividend) by the value in the ST(1) register (the divisor or <strong>modulus</strong>), and stores the result in ST(0). The remainder represents the following value:</p><p>Remainder \u2190 ST(0) \u2212 (Q \u2217 ST(1))</p><p>Here, Q is an integer value that is obtained by rounding the floating-point number quotient of [ST(0) / ST(1)] toward the nearest integer value. The magnitude of the remainder is less than or equal to half the magnitude of the modulus, unless a partial remainder was computed (as described below).</p><p>This instruction produces an exact result; the precision (inexact) exception does not occur and the rounding control has no effect. The following table shows the results obtained when computing the remainder of various classes of numbers, assuming that underflow does not occur.</p><p>When the result is 0, its sign is the same as that of the dividend. When the modulus is \u221e, the result is equal to the value in ST(0).</p>",
+                "html": "<p>Computes the IEEE remainder obtained from dividing the value in the ST(0) register (the dividend) by the value in the ST(1) register (the divisor or <strong>modulus</strong>), and stores the result in ST(0). The remainder represents the following value:</p><p>Remainder := ST(0) \u2212 (Q \u2217 ST(1))</p><p>Here, Q is an integer value that is obtained by rounding the floating-point number quotient of [ST(0) / ST(1)] toward the nearest integer value. The magnitude of the remainder is less than or equal to half the magnitude of the modulus, unless a partial remainder was computed (as described below).</p><p>This instruction produces an exact result; the precision (inexact) exception does not occur and the rounding control has no effect. The following table shows the results obtained when computing the remainder of various classes of numbers, assuming that underflow does not occur.</p><p>When the result is 0, its sign is the same as that of the dividend. When the modulus is \u221e, the result is equal to the value in ST(0).</p>",
                 "tooltip": "Computes the IEEE remainder obtained from dividing the value in the ST(0) register (the dividend) by the value in the ST(1) register (the divisor or modulus), and stores the result in ST(0). The remainder represents the following value",
                 "url": "http://www.felixcloutier.com/x86/FPREM1.html"
             };
@@ -1105,20 +1050,6 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/FPTAN.html"
             };
 
-        case "FRNDINT":
-            return {
-                "html": "<p>Rounds the source value in the ST(0) register to the nearest integral value, depending on the current rounding mode (setting of the RC field of the FPU control word), and stores the result in ST(0).</p><p>If the source value is \u221e, the value is not changed. If the source value is not an integral value, the floating-point inexact-result exception (#P) is generated.</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
-                "tooltip": "Rounds the source value in the ST(0) register to the nearest integral value, depending on the current rounding mode (setting of the RC field of the FPU control word), and stores the result in ST(0).",
-                "url": "http://www.felixcloutier.com/x86/FRNDINT.html"
-            };
-
-        case "FRSTOR":
-            return {
-                "html": "<p>Loads the FPU state (operating environment and register stack) from the memory area specified with the source operand. This state data is typically written to the specified memory location by a previous FSAVE/FNSAVE instruction.</p><p>The FPU operating environment consists of the FPU control word, status word, tag word, instruction pointer, data pointer, and last opcode. Figures 8-9 through 8-12 in the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1</em>, show the layout in memory of the stored environment, depending on the operating mode of the processor (protected or real) and the current operand-size attribute (16-bit or 32-bit). In virtual-8086 mode, the real mode layouts are used. The contents of the FPU register stack are stored in the 80 bytes immediately following the operating environment image.</p><p>The FRSTOR instruction should be executed in the same operating mode as the corresponding FSAVE/FNSAVE instruction.</p><p>If one or more unmasked exception bits are set in the new FPU status word, a floating-point exception will be generated. To avoid raising exceptions when loading a new operating environment, clear all the exception flags in the FPU status word that is being loaded.</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
-                "tooltip": "Loads the FPU state (operating environment and register stack) from the memory area specified with the source operand. This state data is typically written to the specified memory location by a previous FSAVE/FNSAVE instruction.",
-                "url": "http://www.felixcloutier.com/x86/FRSTOR.html"
-            };
-
         case "FNSAVE":
         case "FSAVE":
             return {
@@ -1127,20 +1058,6 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/FSAVE%3AFNSAVE.html"
             };
 
-        case "FSCALE":
-            return {
-                "html": "<p>Truncates the value in the source operand (toward 0) to an integral value and adds that value to the exponent of the destination operand. The destination and source operands are floating-point values located in registers ST(0) and ST(1), respectively. This instruction provides rapid multiplication or division by integral powers of 2. The following table shows the results obtained when scaling various classes of numbers, assuming that neither overflow nor underflow occurs.</p><p>In most cases, only the exponent is changed and the mantissa (significand) remains unchanged. However, when the value being scaled in ST(0) is a denormal value, the mantissa is also changed and the result may turn out to be a normalized number. Similarly, if overflow or underflow results from a scale operation, the resulting mantissa will differ from the source\u2019s mantissa.</p><p>The FSCALE instruction can also be used to reverse the action of the FXTRACT instruction, as shown in the following example:</p>",
-                "tooltip": "Truncates the value in the source operand (toward 0) to an integral value and adds that value to the exponent of the destination operand. The destination and source operands are floating-point values located in registers ST(0) and ST(1), respectively. This instruction provides rapid multiplication or division by integral powers of 2. The following table shows the results obtained when scaling various classes of numbers, assuming that neither overflow nor underflow occurs.",
-                "url": "http://www.felixcloutier.com/x86/FSCALE.html"
-            };
-
-        case "FSIN":
-            return {
-                "html": "<p>Computes an approximation of the sine of the source operand in register ST(0) and stores the result in ST(0). The source operand must be given in radians and must be within the range \u22122<sup>63</sup> to +2<sup>63</sup>. The following table shows the results obtained when taking the sine of various classes of numbers, assuming that underflow does not occur.</p><p>If the source operand is outside the acceptable range, the C2 flag in the FPU status word is set, and the value in register ST(0) remains unchanged. The instruction does not raise an exception when the source operand is out of range. It is up to the program to check the C2 flag for out-of-range conditions. Source values outside the range \u2212 2<sup>63</sup> to +2<sup>63</sup> can be reduced to the range of the instruction by subtracting an appropriate integer multiple of 2\u03c0. However, even within the range -2<sup>63</sup> to +2<sup>63</sup>, inaccurate results can occur because the finite approximation of \u03c0 used internally for argument reduction is not sufficient in all cases. Therefore, for accurate results it is safe to apply FSIN only to arguments reduced accurately in software, to a value smaller in absolute value than 3\u03c0/4. See the sections titled \u201cApproximation of Pi\u201d and \u201cTranscendental Instruction Accuracy\u201d in Chapter 8 of the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1</em>, for a discussion of the proper value to use for \u03c0 in performing such reductions.</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
-                "tooltip": "Computes an approximation of the sine of the source operand in register ST(0) and stores the result in ST(0). The source operand must be given in radians and must be within the range \u2212263 to +263. The following table shows the results obtained when taking the sine of various classes of numbers, assuming that underflow does not occur.",
-                "url": "http://www.felixcloutier.com/x86/FSIN.html"
-            };
-
         case "FSINCOS":
             return {
                 "html": "<p>Computes both the approximate sine and the cosine of the source operand in register ST(0), stores the sine in ST(0), and pushes the cosine onto the top of the FPU register stack. (This instruction is faster than executing the FSIN and FCOS instructions in succession.)</p><p>The source operand must be given in radians and must be within the range \u22122<sup>63</sup> to +2<sup>63</sup>. The following table shows the results obtained when taking the sine and cosine of various classes of numbers, assuming that underflow does not occur.</p><p>If the source operand is outside the acceptable range, the C2 flag in the FPU status word is set, and the value in register ST(0) remains unchanged. The instruction does not raise an exception when the source operand is out of range. It is up to the program to check the C2 flag for out-of-range conditions. Source values outside the range \u2212 2<sup>63</sup> to +2<sup>63</sup> can be reduced to the range of the instruction by subtracting an appropriate integer multiple of 2\u03c0. However, even within the range -2<sup>63</sup> to +2<sup>63</sup>, inaccurate results can occur because the finite approximation of \u03c0 used internally for argument reduction is not sufficient in all cases. Therefore, for accurate results it is safe to apply FSINCOS only to arguments reduced accurately in software, to a value smaller in absolute value than 3\u03c0/8. See the sections titled \u201cApproximation of Pi\u201d and \u201cTranscendental Instruction Accuracy\u201d in Chapter 8 of the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1</em>, for a discussion of the proper value to use for \u03c0 in performing such reductions.</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
@@ -1148,13 +1065,6 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/FSINCOS.html"
             };
 
-        case "FSQRT":
-            return {
-                "html": "<p>Computes the square root of the source value in the ST(0) register and stores the result in ST(0).</p><p>The following table shows the results obtained when taking the square root of various classes of numbers, assuming that neither overflow nor underflow occurs.</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
-                "tooltip": "Computes the square root of the source value in the ST(0) register and stores the result in ST(0).",
-                "url": "http://www.felixcloutier.com/x86/FSQRT.html"
-            };
-
         case "FST":
         case "FSTP":
             return {
@@ -1205,13 +1115,6 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/FSUBR%3AFSUBRP%3AFISUBR.html"
             };
 
-        case "FTST":
-            return {
-                "html": "<p>Compares the value in the ST(0) register with 0.0 and sets the condition code flags C0, C2, and C3 in the FPU status word according to the results (see table below).</p><p>This instruction performs an \u201cunordered comparison.\u201d An unordered comparison also checks the class of the numbers being compared (see \u201cFXAM\u2014Examine Floating-Point\u201d in this chapter). If the value in register ST(0) is a NaN or is in an undefined format, the condition flags are set to \u201cunordered\u201d and the invalid operation exception is generated.</p><p>The sign of zero is ignored, so that (\u2013 0.0 \u2190 +0.0).</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
-                "tooltip": "Compares the value in the ST(0) register with 0.0 and sets the condition code flags C0, C2, and C3 in the FPU status word according to the results (see table below).",
-                "url": "http://www.felixcloutier.com/x86/FTST.html"
-            };
-
         case "FUCOM":
         case "FUCOMP":
         case "FUCOMPP":
@@ -1221,13 +1124,6 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/FUCOM%3AFUCOMP%3AFUCOMPP.html"
             };
 
-        case "FXAM":
-            return {
-                "html": "<p>Examines the contents of the ST(0) register and sets the condition code flags C0, C2, and C3 in the FPU status word to indicate the class of value or number in the register (see the table below).</p><p>The C1 flag is set to the sign of the value in ST(0), regardless of whether the register is empty or full.</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
-                "tooltip": "Examines the contents of the ST(0) register and sets the condition code flags C0, C2, and C3 in the FPU status word to indicate the class of value or number in the register (see the table below).",
-                "url": "http://www.felixcloutier.com/x86/FXAM.html"
-            };
-
         case "FXCH":
             return {
                 "html": "<p>Exchanges the contents of registers ST(0) and ST(i). If no source operand is specified, the contents of ST(0) and ST(1) are exchanged.</p><p>This instruction provides a simple means of moving values in the FPU register stack to the top of the stack [ST(0)], so that they can be operated on by those floating-point instructions that can only operate on values in ST(0). For example, the following instruction sequence takes the square root of the third register from the top of the register stack:</p>",
@@ -1251,25 +1147,46 @@ export function getAsmOpcode(opcode) {
 
         case "FXTRACT":
             return {
-                "html": "<p>Separates the source value in the ST(0) register into its exponent and significand, stores the exponent in ST(0), and pushes the significand onto the register stack. Following this operation, the new top-of-stack register ST(0) contains the value of the original significand expressed as a floating-point value. The sign and significand of this value are the same as those found in the source operand, and the exponent is 3FFFH (biased value for a true exponent of zero). The ST(1) register contains the value of the original operand\u2019s true (unbiased) exponent expressed as a floating-point value. (The operation performed by this instruction is a superset of the IEEE-recommended logb(<em>x</em>) function.)</p><p>This instruction and the F2XM1 instruction are useful for performing power and range scaling operations. The FXTRACT instruction is also useful for converting numbers in double extended-precision floating-point format to decimal representations (e.g., for printing or displaying).</p><p>If the floating-point zero-divide exception (#Z) is masked and the source operand is zero, an exponent value of \u2013\u221e is stored in register ST(1) and 0 with the sign of the source operand is stored in register ST(0).</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
+                "html": "<p>Separates the source value in the ST(0) register into its exponent and significand, stores the exponent in ST(0), and pushes the significand onto the register stack. Following this operation, the new top-of-stack register ST(0) contains the value of the original significand expressed as a floating-point value. The sign and significand of this value are the same as those found in the source operand, and the exponent is 3FFFH (biased value for a true exponent of zero). The ST(1) register contains the value of the original operand\u2019s true (unbiased) exponent expressed as a floating-point value. (The operation performed by this instruction is a superset of the IEEE-recommended logb(<em>x</em>) function.)</p><p>This instruction and the F2XM1 instruction are useful for performing power and range scaling operations. The FXTRACT instruction is also useful for converting numbers in double extended-precision floating-point format to decimal representations (e.g., for printing or displaying).</p><p>If the floating-point zero-divide exception (#Z) is masked and the source operand is zero, an exponent value of \u2013 \u221e is stored in register ST(1) and 0 with the sign of the source operand is stored in register ST(0).</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
                 "tooltip": "Separates the source value in the ST(0) register into its exponent and significand, stores the exponent in ST(0), and pushes the significand onto the register stack. Following this operation, the new top-of-stack register ST(0) contains the value of the original significand expressed as a floating-point value. The sign and significand of this value are the same as those found in the source operand, and the exponent is 3FFFH (biased value for a true exponent of zero). The ST(1) register contains the value of the original operand\u2019s true (unbiased) exponent expressed as a floating-point value. (The operation performed by this instruction is a superset of the IEEE-recommended logb(x) function.)",
                 "url": "http://www.felixcloutier.com/x86/FXTRACT.html"
             };
 
         case "FYL2X":
             return {
-                "html": "<p>Computes (ST(1) \u2217 log<sub>2</sub> (ST(0))), stores the result in register ST(1), and pops the FPU register stack. The source operand in ST(0) must be a non-zero positive number.</p><p>The following table shows the results obtained when taking the log of various classes of numbers, assuming that neither overflow nor underflow occurs.</p><p>If the divide-by-zero exception is masked and register ST(0) contains \u00b10, the instruction returns \u221e with a sign that is the opposite of the sign of the source operand in register ST(1).</p><p>The FYL2X instruction is designed with a built-in multiplication to optimize the calculation of logarithms with an arbitrary positive base (b):</p><p>log<sub>b</sub>x \u2190 (log<sub>2</sub>b)<sup>\u20131</sup> \u2217 log<sub>2</sub>x</p>",
+                "html": "<p>Computes (ST(1) \u2217 log<sub>2</sub> (ST(0))), stores the result in register ST(1), and pops the FPU register stack. The source operand in ST(0) must be a non-zero positive number.</p><p>The following table shows the results obtained when taking the log of various classes of numbers, assuming that neither overflow nor underflow occurs.</p><p>If the divide-by-zero exception is masked and register ST(0) contains \u00b10, the instruction returns \u221e with a sign that is the opposite of the sign of the source operand in register ST(1).</p><p>The FYL2X instruction is designed with a built-in multiplication to optimize the calculation of logarithms with an arbitrary positive base (b):</p><p>log<sub>b</sub>x := (log<sub>2</sub>b)<sup>\u20131</sup> \u2217 log<sub>2</sub>x</p>",
                 "tooltip": "Computes (ST(1) \u2217 log2 (ST(0))), stores the result in register ST(1), and pops the FPU register stack. The source operand in ST(0) must be a non-zero positive number.",
                 "url": "http://www.felixcloutier.com/x86/FYL2X.html"
             };
 
         case "FYL2XP1":
             return {
-                "html": "<p>Computes (ST(1) \u2217 log<sub>2</sub>(ST(0) + 1.0)), stores the result in register ST(1), and pops the FPU register stack. The source operand in ST(0) must be in the range:</p><p>The source operand in ST(1) can range from \u2212\u221e to +\u221e. If the ST(0) operand is outside of its acceptable range, the result is undefined and software should not rely on an exception being generated. Under some circumstances exceptions may be generated when ST(0) is out of range, but this behavior is implementation specific and not guaranteed.</p><p>The following table shows the results obtained when taking the log epsilon of various classes of numbers, assuming that underflow does not occur.</p><p>This instruction provides optimal accuracy for values of epsilon [the value in register ST(0)] that are close to 0. For small epsilon (\u03b5) values, more significant digits can be retained by using the FYL2XP1 instruction than by using (\u03b5+1) as an argument to the FYL2X instruction. The (\u03b5+1) expression is commonly found in compound interest and annuity calculations. The result can be simply converted into a value in another logarithm base by including a scale factor in the ST(1) source operand. The following equation is used to calculate the scale factor for a particular logarithm base, where n is the logarithm base desired for the result of the FYL2XP1 instruction:</p><p>scale factor \u2190 log<sub>n</sub> 2</p>",
+                "html": "<p>Computes (ST(1) \u2217 log<sub>2</sub>(ST(0) + 1.0)), stores the result in register ST(1), and pops the FPU register stack. The source operand in ST(0) must be in the range:</p><p>The source operand in ST(1) can range from \u2212\u221e to +\u221e. If the ST(0) operand is outside of its acceptable range, the result is undefined and software should not rely on an exception being generated. Under some circumstances exceptions may be generated when ST(0) is out of range, but this behavior is implementation specific and not guaranteed.</p><p>The following table shows the results obtained when taking the log epsilon of various classes of numbers, assuming that underflow does not occur.</p><p>This instruction provides optimal accuracy for values of epsilon [the value in register ST(0)] that are close to 0. For small epsilon (\u03b5) values, more significant digits can be retained by using the FYL2XP1 instruction than by using (\u03b5+1) as an argument to the FYL2X instruction. The (\u03b5+1) expression is commonly found in compound interest and annuity calculations. The result can be simply converted into a value in another logarithm base by including a scale factor in the ST(1) source operand. The following equation is used to calculate the scale factor for a particular logarithm base, where n is the logarithm base desired for the result of the FYL2XP1 instruction:</p><p>scale factor := log<sub>n</sub> 2</p>",
                 "tooltip": "Computes (ST(1) \u2217 log2(ST(0) + 1.0)), stores the result in register ST(1), and pops the FPU register stack. The source operand in ST(0) must be in the range",
                 "url": "http://www.felixcloutier.com/x86/FYL2XP1.html"
             };
 
+        case "VGF2P8AFFINEINVQB":
+            return {
+                "html": "<p>The AFFINEINVB instruction computes an affine transformation in the Galois Field 2<sup>8</sup>. For this instruction, an affine transformation is defined by A * inv(x) + b where \u201cA\u201d is an 8 by 8 bit matrix, and \u201cx\u201d and \u201cb\u201d are 8-bit vectors. The inverse of the bytes in x is defined with respect to the reduction polynomial x<sup>8</sup> + x<sup>4</sup> + x<sup>3</sup> + x + 1.</p><p>One SIMD register (operand 1) holds \u201cx\u201d as either 16, 32 or 64 8-bit vectors. A second SIMD (operand 2) register or memory operand contains 2, 4, or 8 \u201cA\u201d values, which are operated upon by the correspondingly aligned 8 \u201cx\u201d values in the first register. The \u201cb\u201d vector is constant for all calculations and contained in the immediate byte.</p><p>The EVEX encoded form of this instruction does not support memory fault suppression. The SSE encoded forms of the instruction require 16B alignment on their memory operations.</p><p>The inverse of each byte is given by the following table. The upper nibble is on the vertical axis and the lower nibble is on the horizontal axis. For example, the inverse of 0x95 is 0x8A.</p>",
+                "tooltip": "The AFFINEINVB instruction computes an affine transformation in the Galois Field 28. For this instruction, an affine transformation is defined by A * inv(x) + b where \u201cA\u201d is an 8 by 8 bit matrix, and \u201cx\u201d and \u201cb\u201d are 8-bit vectors. The inverse of the bytes in x is defined with respect to the reduction polynomial x8 + x4 + x3 + x + 1.",
+                "url": "http://www.felixcloutier.com/x86/GF2P8AFFINEINVQB.html"
+            };
+
+        case "VGF2P8AFFINEQB":
+            return {
+                "html": "<p>The AFFINEB instruction computes an affine transformation in the Galois Field 2<sup>8</sup>. For this instruction, an affine transformation is defined by A * x + b where \u201cA\u201d is an 8 by 8 bit matrix, and \u201cx\u201d and \u201cb\u201d are 8-bit vectors. One SIMD register (operand 1) holds \u201cx\u201d as either 16, 32 or 64 8-bit vectors. A second SIMD (operand 2) register or memory operand contains 2, 4, or 8 \u201cA\u201d values, which are operated upon by the correspondingly aligned 8 \u201cx\u201d values in the first register. The \u201cb\u201d vector is constant for all calculations and contained in the immediate byte.</p><p>The EVEX encoded form of this instruction does not support memory fault suppression. The SSE encoded forms of the instruction require16B alignment on their memory operations.</p>",
+                "tooltip": "The AFFINEB instruction computes an affine transformation in the Galois Field 28. For this instruction, an affine transformation is defined by A * x + b where \u201cA\u201d is an 8 by 8 bit matrix, and \u201cx\u201d and \u201cb\u201d are 8-bit vectors. One SIMD register (operand 1) holds \u201cx\u201d as either 16, 32 or 64 8-bit vectors. A second SIMD (operand 2) register or memory operand contains 2, 4, or 8 \u201cA\u201d values, which are operated upon by the correspondingly aligned 8 \u201cx\u201d values in the first register. The \u201cb\u201d vector is constant for all calculations and contained in the immediate byte.",
+                "url": "http://www.felixcloutier.com/x86/GF2P8AFFINEQB.html"
+            };
+
+        case "VGF2P8MULB":
+            return {
+                "html": "<p>The instruction multiplies elements in the finite field GF(2<sup>8</sup>), operating on a byte (field element) in the first source operand and the corresponding byte in a second source operand. The field GF(2<sup>8</sup>) is represented in polynomial representation with the reduction polynomial x<sup>8</sup> + x<sup>4</sup> + x<sup>3</sup> + x + 1.</p><p>This instruction does not support broadcasting.</p><p>The EVEX encoded form of this instruction supports memory fault suppression. The SSE encoded forms of the instruction require16B alignment on their memory operations.</p>",
+                "tooltip": "The instruction multiplies elements in the finite field GF(28), operating on a byte (field element) in the first source operand and the corresponding byte in a second source operand. The field GF(28) is represented in polynomial representation with the reduction polynomial x8 + x4 + x3 + x + 1.",
+                "url": "http://www.felixcloutier.com/x86/GF2P8MULB.html"
+            };
+
         case "HADDPD":
         case "VHADDPD":
             return {
@@ -1293,6 +1210,13 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/HLT.html"
             };
 
+        case "HRESET":
+            return {
+                "html": "<p>Requests the processor to selectively reset selected components of hardware history maintained by the current logical processor. HRESET operation is controlled by the implicit EAX operand. The value of the explicit imm8 operand is ignored. This instruction can only be executed at privilege level 0.</p><p>The HRESET instruction can be used to request reset of multiple components of hardware history. Prior to the execution of HRESET, the system software must take the following steps:</p><p>1. Enumerate the HRESET capabilities via CPUID.20H.0H:EBX, which indicates what components of hardware history can be reset.</p><p>2. Only the bits enumerated by CPUID.20H.0H:EBX can be set in the IA32_HRESET_ENABLE MSR.</p><p>HRESET causes a general-protection exception (#GP) if EAX sets any bits that are not set in the IA32_HRESET_ENABLE MSR.</p>",
+                "tooltip": "Requests the processor to selectively reset selected components of hardware history maintained by the current logical processor. HRESET operation is controlled by the implicit EAX operand. The value of the explicit imm8 operand is ignored. This instruction can only be executed at privilege level 0.",
+                "url": "http://www.felixcloutier.com/x86/HRESET.html"
+            };
+
         case "HSUBPD":
         case "VHSUBPD":
             return {
@@ -1325,7 +1249,7 @@ export function getAsmOpcode(opcode) {
 
         case "IN":
             return {
-                "html": "<p>Copies the value from the I/O port specified with the second operand (source operand) to the destination operand (first operand). The source operand can be a byte-immediate or the DX register; the destination operand can be register AL, AX, or EAX, depending on the size of the port being accessed (8, 16, or 32 bits, respectively). Using the DX register as a source operand allows I/O port addresses from 0 to 65,535 to be accessed; using a byte immediate allows I/O port addresses 0 to 255 to be accessed.</p><p>When accessing an 8-bit I/O port, the opcode determines the port size; when accessing a 16- and 32-bit I/O port, the operand-size attribute determines the port size. At the machine code level, I/O instructions are shorter when accessing 8-bit I/O ports. Here, the upper eight bits of the port address will be 0.</p><p>This instruction is only useful for accessing I/O ports located in the processor\u2019s I/O address space. See Chapter 18, \u201cInput/Output,\u201d in the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1</em>, for more information on accessing I/O ports in the I/O address space.</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
+                "html": "<p>Copies the value from the I/O port specified with the second operand (source operand) to the destination operand (first operand). The source operand can be a byte-immediate or the DX register; the destination operand can be register AL, AX, or EAX, depending on the size of the port being accessed (8, 16, or 32 bits, respectively). Using the DX register as a source operand allows I/O port addresses from 0 to 65,535 to be accessed; using a byte immediate allows I/O port addresses 0 to 255 to be accessed.</p><p>When accessing an 8-bit I/O port, the opcode determines the port size; when accessing a 16- and 32-bit I/O port, the operand-size attribute determines the port size. At the machine code level, I/O instructions are shorter when accessing 8-bit I/O ports. Here, the upper eight bits of the port address will be 0.</p><p>This instruction is only useful for accessing I/O ports located in the processor\u2019s I/O address space. See Chapter 19, \u201cInput/Output,\u201d in the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1</em>, for more information on accessing I/O ports in the I/O address space.</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
                 "tooltip": "Copies the value from the I/O port specified with the second operand (source operand) to the destination operand (first operand). The source operand can be a byte-immediate or the DX register; the destination operand can be register AL, AX, or EAX, depending on the size of the port being accessed (8, 16, or 32 bits, respectively). Using the DX register as a source operand allows I/O port addresses from 0 to 65,535 to be accessed; using a byte immediate allows I/O port addresses 0 to 255 to be accessed.",
                 "url": "http://www.felixcloutier.com/x86/IN.html"
             };
@@ -1337,6 +1261,14 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/INC.html"
             };
 
+        case "INCSSPD":
+        case "INCSSPQ":
+            return {
+                "html": "<p>This instruction can be used to increment the current shadow stack pointer by the operand size of the instruction times the unsigned 8-bit value specified by bits 7:0 in the source operand. The instruction performs a pop and discard of the first and last element on the shadow stack in the range specified by the unsigned 8-bit value in bits 7:0 of the source operand.</p>",
+                "tooltip": "This instruction can be used to increment the current shadow stack pointer by the operand size of the instruction times the unsigned 8-bit value specified by bits 7:0 in the source operand. The instruction performs a pop and discard of the first and last element on the shadow stack in the range specified by the unsigned 8-bit value in bits 7:0 of the source operand.",
+                "url": "http://www.felixcloutier.com/x86/INCSSPD%3AINCSSPQ.html"
+            };
+
         case "INS":
         case "INSB":
         case "INSD":
@@ -1374,7 +1306,7 @@ export function getAsmOpcode(opcode) {
 
         case "INVPCID":
             return {
-                "html": "<p>Invalidates mappings in the translation lookaside buffers (TLBs) and paging-structure caches based on process-context identifier (PCID). (See Section 4.10, \u201cCaching Translation Information,\u201d in <em>Intel 64 and IA-32 Architecture Software Developer\u2019s Manual, Volume 3A</em>.) Invalidation is based on the INVPCID type specified in the register operand and the INVPCID descriptor specified in the memory operand.</p><p>Outside 64-bit mode, the register operand is always 32 bits, regardless of the value of CS.D. In 64-bit mode the register operand has 64 bits.</p><p>There are four INVPCID types currently defined:</p><p>The INVPCID descriptor comprises 128 bits and consists of a PCID and a linear address as shown in <a href=\"http://www.felixcloutier.com/x86/INVPCID.html#fig-3-24\" rel=\"noreferrer noopener\" target=\"_blank\">Figure 3-24</a>. For INVPCID type 0, the processor uses the full 64 bits of the linear address even outside 64-bit mode; the linear address is not used for other INVPCID types.</p><p>If CR4.PCIDE = 0, a logical processor does not cache information for any PCID other than 000H. In this case, executions with INVPCID types 0 and 1 are allowed only if the PCID specified in the INVPCID descriptor is 000H; executions with INVPCID types 2 and 3 invalidate mappings only for PCID 000H. Note that CR4.PCIDE must be 0 outside 64-bit mode (see Chapter 4.10.1, \u201cProcess-Context Identifiers (PCIDs)\u201a\u201d of the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3A</em>).</p>",
+                "html": "<p>Invalidates mappings in the translation lookaside buffers (TLBs) and paging-structure caches based on process-context identifier (PCID). (See Section 4.10, \u201cCaching Translation Information,\u201d in <em>Intel 64 and IA-32 Architecture Software Developer\u2019s Manual, Volume 3A</em>.) Invalidation is based on the INVPCID type specified in the register operand and the INVPCID descriptor specified in the memory operand.</p><p>Outside 64-bit mode, the register operand is always 32 bits, regardless of the value of CS.D. In 64-bit mode the register operand has 64 bits.</p><p>There are four INVPCID types currently defined:</p><p>The INVPCID descriptor comprises 128 bits and consists of a PCID and a linear address as shown in <a href=\"http://www.felixcloutier.com/x86/INVPCID.html#fig-3-24\" rel=\"noreferrer noopener\" target=\"_blank\">Figure 3-24</a>. For INVPCID type 0, the processor uses the full 64 bits of the linear address even outside 64-bit mode; the linear address is not used for other INVPCID types.</p><p>If CR4.PCIDE = 0, a logical processor does not cache information for any PCID other than 000H. In this case, executions with INVPCID types 0 and 1 are allowed only if the PCID specified in the INVPCID descriptor is 000H; executions with INVPCID types 2 and 3 invalidate mappings only for PCID 000H. Note that CR4.PCIDE must be 0 outside IA-32e mode (see Chapter 4.10.1, \u201cProcess-Context Identifiers (PCIDs)\u201a\u201d of the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3A</em>).</p>",
                 "tooltip": "Invalidates mappings in the translation lookaside buffers (TLBs) and paging-structure caches based on process-context identifier (PCID). (See Section 4.10, \u201cCaching Translation Information,\u201d in Intel 64 and IA-32 Architecture Software Developer\u2019s Manual, Volume 3A.) Invalidation is based on the INVPCID type specified in the register operand and the INVPCID descriptor specified in the memory operand.",
                 "url": "http://www.felixcloutier.com/x86/INVPCID.html"
             };
@@ -1383,9 +1315,9 @@ export function getAsmOpcode(opcode) {
         case "IRETD":
         case "IRETQ":
             return {
-                "html": "<p>Returns program control from an exception or interrupt handler to a program or procedure that was interrupted by an exception, an external interrupt, or a software-generated interrupt. These instructions are also used to perform a return from a nested task. (A nested task is created when a CALL instruction is used to initiate a task switch or when an interrupt or exception causes a task switch to an interrupt or exception handler.) See the section titled \u201cTask Linking\u201d in Chapter 7 of the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3A</em>.</p><p>IRET and IRETD are mnemonics for the same opcode. The IRETD mnemonic (interrupt return double) is intended for use when returning from an interrupt when using the 32-bit operand size; however, most assemblers use the IRET mnemonic interchangeably for both operand sizes.</p><p>In Real-Address Mode, the IRET instruction preforms a far return to the interrupted program or procedure. During this operation, the processor pops the return instruction pointer, return code segment selector, and EFLAGS image from the stack to the EIP, CS, and EFLAGS registers, respectively, and then resumes execution of the interrupted program or procedure.</p><p>In Protected Mode, the action of the IRET instruction depends on the settings of the NT (nested task) and VM flags in the EFLAGS register and the VM flag in the EFLAGS image stored on the current stack. Depending on the setting of these flags, the processor performs the following types of interrupt returns:</p><p>If the NT flag (EFLAGS register) is cleared, the IRET instruction performs a far return from the interrupt procedure, without a task switch. The code segment being returned to must be equally or less privileged than the interrupt handler routine (as indicated by the RPL field of the code segment selector popped from the stack).</p>",
+                "html": "<p>Returns program control from an exception or interrupt handler to a program or procedure that was interrupted by an exception, an external interrupt, or a software-generated interrupt. These instructions are also used to perform a return from a nested task. (A nested task is created when a CALL instruction is used to initiate a task switch or when an interrupt or exception causes a task switch to an interrupt or exception handler.) See the section titled \u201cTask Linking\u201d in Chapter 7 of the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3A</em>.</p><p>IRET and IRETD are mnemonics for the same opcode. The IRETD mnemonic (interrupt return double) is intended for use when returning from an interrupt when using the 32-bit operand size; however, most assemblers use the IRET mnemonic interchangeably for both operand sizes.</p><p>In Real-Address Mode, the IRET instruction performs a far return to the interrupted program or procedure. During this operation, the processor pops the return instruction pointer, return code segment selector, and EFLAGS image from the stack to the EIP, CS, and EFLAGS registers, respectively, and then resumes execution of the interrupted program or procedure.</p><p>In Protected Mode, the action of the IRET instruction depends on the settings of the NT (nested task) and VM flags in the EFLAGS register and the VM flag in the EFLAGS image stored on the current stack. Depending on the setting of these flags, the processor performs the following types of interrupt returns:</p><p>If the NT flag (EFLAGS register) is cleared, the IRET instruction performs a far return from the interrupt procedure, without a task switch. The code segment being returned to must be equally or less privileged than the interrupt handler routine (as indicated by the RPL field of the code segment selector popped from the stack).</p>",
                 "tooltip": "Returns program control from an exception or interrupt handler to a program or procedure that was interrupted by an exception, an external interrupt, or a software-generated interrupt. These instructions are also used to perform a return from a nested task. (A nested task is created when a CALL instruction is used to initiate a task switch or when an interrupt or exception causes a task switch to an interrupt or exception handler.) See the section titled \u201cTask Linking\u201d in Chapter 7 of the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3A.",
-                "url": "http://www.felixcloutier.com/x86/IRET%3AIRETD.html"
+                "url": "http://www.felixcloutier.com/x86/IRET%3AIRETD%3AIRETQ.html"
             };
 
         case "JMP":
@@ -1817,14 +1749,14 @@ export function getAsmOpcode(opcode) {
 
         case "MOVDIR64B":
             return {
-                "html": "<p>Moves 64-bytes as direct-store with 64-byte write atomicity from source memory address to destination memory address. The source operand is a normal memory operand. The destination operand is a memory location specified in a general-purpose register. The register content is interpreted as an offset into ES segment without any segment override. In 64-bit mode, the register operand width is 64-bits (32-bits with 67H prefix). Outside of 64-bit mode, the register width is 32-bits when CS.D=1 (16-bits with 67H prefix), and 16-bits when CS.D=0 (32-bits with 67H prefix). MOVDIR64B requires the destination address to be 64-byte aligned. No alignment restriction is enforced for source operand.</p><p>MOVDIR64B reads 64-bytes from the source memory address and performs a 64-byte direct-store operation to the destination address. The load operation follows normal read ordering based on source address memory-type. The direct-store is implemented by using the write combining (WC) memory type protocol for writing data. Using this protocol, the processor does not write the data into the cache hierarchy, nor does it fetch the corresponding cache line from memory into the cache hierarchy. If the destination address is cached, the line is written-back (if modified) and invalidated from the cache, before the direct-store.</p><p>Unlike stores with non-temporal hint which allow UC/WP memory-type for destination to override the non-temporal hint, direct-stores always follow WC memory type protocol irrespective of destination address memory type (including UC/WP types). Unlike WC stores and stores with non-temporal hint, direct-stores are eligible for immediate eviction from the write-combining buffer, and thus not combined with younger stores (including direct-stores) to the same address. Older WC and non-temporal stores held in the write-combing buffer may be combined with younger direct stores to the same address. Because WC protocol used by direct-stores follow weakly-ordered memory consistency model, fencing operation using SFENCE or MFENCE should follow the MOVDIR64B instruction to enforce ordering when needed.</p><p>There is no atomicity guarantee provided for the 64-byte load operation from source address, and processor implementations may use multiple load operations to read the 64-bytes. The 64-byte direct-store issued by MOVDIR64B guarantees 64-byte write-completion atomicity. This means that the data arrives at the destination in a single undivided 64-byte write transaction.</p><p>Availability of the MOVDIR64B instruction is indicated by the presence of the CPUID feature flag MOVDIR64B (bit 28 of the ECX register in leaf 07H, see \u201cCPUID\u2014CPU Identification\u201d in the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 2A</em>).</p>",
+                "html": "<p>Moves 64-bytes as direct-store with 64-byte write atomicity from source memory address to destination memory address. The source operand is a normal memory operand. The destination operand is a memory location specified in a general-purpose register. The register content is interpreted as an offset into ES segment without any segment override. In 64-bit mode, the register operand width is 64-bits (32-bits with 67H prefix). Outside of 64-bit mode, the register width is 32-bits when CS.D=1 (16-bits with 67H prefix), and 16-bits when CS.D=0 (32-bits with 67H prefix). MOVDIR64B requires the destination address to be 64-byte aligned. No alignment restriction is enforced for source operand.</p><p>MOVDIR64B first reads 64-bytes from the source memory address. It then performs a 64-byte direct-store operation to the destination address. The load operation follows normal read ordering based on source address memory-type. The direct-store is implemented by using the write combining (WC) memory type protocol for writing data. Using this protocol, the processor does not write the data into the cache hierarchy, nor does it fetch the corresponding cache line from memory into the cache hierarchy. If the destination address is cached, the line is written-back (if modified) and invalidated from the cache, before the direct-store.</p><p>Unlike stores with non-temporal hint which allow UC/WP memory-type for destination to override the non-temporal hint, direct-stores always follow WC memory type protocol irrespective of destination address memory type (including UC/WP types). Unlike WC stores and stores with non-temporal hint, direct-stores are eligible for immediate eviction from the write-combining buffer, and thus not combined with younger stores (including direct-stores) to the same address. Older WC and non-temporal stores held in the write-combing buffer may be combined with younger direct stores to the same address. Direct stores are weakly ordered relative to other stores. Software that desires stronger ordering should use a fencing instruction (MFENCE or SFENCE) before or after a direct store to enforce the ordering desired.</p><p>There is no atomicity guarantee provided for the 64-byte load operation from source address, and processor implementations may use multiple load operations to read the 64-bytes. The 64-byte direct-store issued by MOVDIR64B guarantees 64-byte write-completion atomicity. This means that the data arrives at the destination in a single undivided 64-byte write transaction.</p><p>Availability of the MOVDIR64B instruction is indicated by the presence of the CPUID feature flag MOVDIR64B (bit 28 of the ECX register in leaf 07H, see \u201cCPUID\u2014CPU Identification\u201d in the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 2A</em>).</p>",
                 "tooltip": "Moves 64-bytes as direct-store with 64-byte write atomicity from source memory address to destination memory address. The source operand is a normal memory operand. The destination operand is a memory location specified in a general-purpose register. The register content is interpreted as an offset into ES segment without any segment override. In 64-bit mode, the register operand width is 64-bits (32-bits with 67H prefix). Outside of 64-bit mode, the register width is 32-bits when CS.D=1 (16-bits with 67H prefix), and 16-bits when CS.D=0 (32-bits with 67H prefix). MOVDIR64B requires the destination address to be 64-byte aligned. No alignment restriction is enforced for source operand.",
                 "url": "http://www.felixcloutier.com/x86/MOVDIR64B.html"
             };
 
         case "MOVDIRI":
             return {
-                "html": "<p>Moves the doubleword integer in the source operand (second operand) to the destination operand (first operand) using a direct-store operation. The source operand is a general purpose register. The destination operand is a 32-bit memory location. In 64-bit mode, the instruction\u2019s default operation size is 32 bits. Use of the REX.R prefix permits access to additional registers (R8-R15). Use of the REX.W prefix promotes operation to 64 bits. See summary chart at the beginning of this section for encoding data and limits.</p><p>The direct-store is implemented by using write combining (WC) memory type protocol for writing data. Using this protocol, the processor does not write the data into the cache hierarchy, nor does it fetch the corresponding cache line from memory into the cache hierarchy. If the destination address is cached, the line is written-back (if modified) and invalidated from the cache, before the direct-store. Unlike stores with non-temporal hint that allow uncached (UC) and write-protected (WP) memory-type for the destination to override the non-temporal hint, direct-stores always follow WC memory type protocol irrespective of the destination address memory type (including UC and WP types).</p><p>Unlike WC stores and stores with non-temporal hint, direct-stores are eligible for immediate eviction from the write-combining buffer, and thus not combined with younger stores (including direct-stores) to the same address. Older WC and non-temporal stores held in the write-combing buffer may be combined with younger direct stores to the same address. Because WC protocol used by direct-stores follows a weakly-ordered memory consistency model, a fencing operation using SFENCE or MFENCE should follow the MOVDIRI instruction to enforce ordering when needed.</p><p>Direct-stores issued by MOVDIRI to a destination aligned to a 4-byte boundary (8-byte boundary if used with REX.W prefix) guarantee 4-byte (8-byte with REX.W prefix) write-completion atomicity. This means that the data arrives at the destination in a single undivided 4-byte (or 8-byte) write transaction. If the destination is not aligned for the write size, the direct-stores issued by MOVDIRI are split and arrive at the destination in two parts. Each part of such split direct-store will not merge with younger stores but can arrive at the destination in either order. Availability of the MOVDIRI instruction is indicated by the presence of the CPUID feature flag MOVDIRI (bit 27 of the ECX register in leaf 07H, see \u201cCPUID\u2014CPU Identification\u201d in the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 2A</em>).</p>",
+                "html": "<p>Moves the doubleword integer in the source operand (second operand) to the destination operand (first operand) using a direct-store operation. The source operand is a general purpose register. The destination operand is a 32-bit memory location. In 64-bit mode, the instruction\u2019s default operation size is 32 bits. Use of the REX.R prefix permits access to additional registers (R8-R15). Use of the REX.W prefix promotes operation to 64 bits. See summary chart at the beginning of this section for encoding data and limits.</p><p>The direct-store is implemented by using write combining (WC) memory type protocol for writing data. Using this protocol, the processor does not write the data into the cache hierarchy, nor does it fetch the corresponding cache line from memory into the cache hierarchy. If the destination address is cached, the line is written-back (if modified) and invalidated from the cache, before the direct-store. Unlike stores with non-temporal hint that allow uncached (UC) and write-protected (WP) memory-type for the destination to override the non-temporal hint, direct-stores always follow WC memory type protocol irrespective of the destination address memory type (including UC and WP types).</p><p>Unlike WC stores and stores with non-temporal hint, direct-stores are eligible for immediate eviction from the write-combining buffer, and thus not combined with younger stores (including direct-stores) to the same address. Older WC and non-temporal stores held in the write-combing buffer may be combined with younger direct stores to the same address. Direct stores are weakly ordered relative to other stores. Software that desires stronger ordering should use a fencing instruction (MFENCE or SFENCE) before or after a direct store to enforce the ordering desired.</p><p>Direct-stores issued by MOVDIRI to a destination aligned to a 4-byte boundary (8-byte boundary if used with REX.W prefix) guarantee 4-byte (8-byte with REX.W prefix) write-completion atomicity. This means that the data arrives at the destination in a single undivided 4-byte (or 8-byte) write transaction. If the destination is not aligned for the write size, the direct-stores issued by MOVDIRI are split and arrive at the destination in two parts. Each part of such split direct-store will not merge with younger stores but can arrive at the destination in either order. Availability of the MOVDIRI instruction is indicated by the presence of the CPUID feature flag MOVDIRI (bit 27 of the ECX register in leaf 07H, see \u201cCPUID\u2014CPU Identification\u201d in the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 2A</em>).</p>",
                 "tooltip": "Moves the doubleword integer in the source operand (second operand) to the destination operand (first operand) using a direct-store operation. The source operand is a general purpose register. The destination operand is a 32-bit memory location. In 64-bit mode, the instruction\u2019s default operation size is 32 bits. Use of the REX.R prefix permits access to additional registers (R8-R15). Use of the REX.W prefix promotes operation to 64 bits. See summary chart at the beginning of this section for encoding data and limits.",
                 "url": "http://www.felixcloutier.com/x86/MOVDIRI.html"
             };
@@ -1926,7 +1858,7 @@ export function getAsmOpcode(opcode) {
         case "MOVNTDQA":
         case "VMOVNTDQA":
             return {
-                "html": "<p>MOVNTDQA loads a double quadword from the source operand (second operand) to the destination operand (first operand) using a non-temporal hint if the memory source is WC (write combining) memory type. For WC memory type, the nontemporal hint may be implemented by loading a temporary internal buffer with the equivalent of an aligned cache line without filling this data to the cache. Any memory-type aliased lines in the cache will be snooped and flushed. Subsequent MOVNTDQA reads to unread portions of the WC cache line will receive data from the temporary internal buffer if data is available. The temporary internal buffer may be flushed by the processor at any time for any reason, for example:</p><p>a mis-speculation condition, and various fault conditions</p><p>The non-temporal hint is implemented by using a write combining (WC) memory type protocol when reading the data from memory. Using this protocol, the processor</p><p>does not read the data into the cache hierarchy, nor does it fetch the corresponding cache line from memory into the cache hierarchy. The memory type of the region being read can override the non-temporal hint, if the memory address specified for the non-temporal read is not a WC memory region. Information on non-temporal reads and writes can be found in \u201cCaching of Temporal vs. Non-Temporal Data\u201d in Chapter 10 in the Intel\u00ae 64 and IA-32 Architecture Software Developer\u2019s Manual, Volume 3A.</p><p>Because the WC protocol uses a weakly-ordered memory consistency model, a fencing operation implemented with a MFENCE instruction should be used in conjunction with MOVNTDQA instructions if multiple processors might use different memory types for the referenced memory locations or to synchronize reads of a processor with writes by other agents in the system. A processor\u2019s implementation of the streaming load hint does not override the effective memory type, but the implementation of the hint is processor dependent. For example, a processor implementa-</p>",
+                "html": "<p>MOVNTDQA loads a double quadword from the source operand (second operand) to the destination operand (first operand) using a non-temporal hint if the memory source is WC (write combining) memory type. For WC memory type, the nontemporal hint may be implemented by loading a temporary internal buffer with the equivalent of an aligned cache line without filling this data to the cache. Any memory-type aliased lines in the cache will be snooped and flushed. Subsequent MOVNTDQA reads to unread portions of the WC cache line will receive data from the temporary internal buffer if data is available. The temporary internal buffer may be flushed by the processor at any time for any reason, for example:</p><p>a mis-speculation condition, and various fault conditions</p><p>The non-temporal hint is implemented by using a write combining (WC) memory type protocol when reading the data from memory. Using this protocol, the processor does not read the data into the cache hierarchy, nor does it fetch the corresponding cache line from memory into the cache hierarchy. The memory type of the region being read can override the non-temporal hint, if the memory address specified for the non-temporal read is not a WC memory region. Information on non-temporal reads and writes can be found in \u201cCaching of Temporal vs. NonTemporal Data\u201d in Chapter 10 in the Intel\u00ae 64 and IA-32 Architecture Software Developer\u2019s Manual, Volume 3A.</p><p>Because the WC protocol uses a weakly-ordered memory consistency model, a fencing operation implemented with a MFENCE instruction should be used in conjunction with MOVNTDQA instructions if multiple processors might use different memory types for the referenced memory locations or to synchronize reads of a processor with writes by other agents in the system. A processor\u2019s implementation of the streaming load hint does not override the effective memory type, but the implementation of the hint is processor dependent. For example, a processor implementation may choose to ignore the hint and process the instruction as a normal MOVDQA for any memory type. Alter-</p><p>natively, another implementation may optimize cache reads generated by MOVNTDQA on WB memory type to reduce cache evictions.</p>",
                 "tooltip": "MOVNTDQA loads a double quadword from the source operand (second operand) to the destination operand (first operand) using a non-temporal hint if the memory source is WC (write combining) memory type. For WC memory type, the nontemporal hint may be implemented by loading a temporary internal buffer with the equivalent of an aligned cache line without filling this data to the cache. Any memory-type aliased lines in the cache will be snooped and flushed. Subsequent MOVNTDQA reads to unread portions of the WC cache line will receive data from the temporary internal buffer if data is available. The temporary internal buffer may be flushed by the processor at any time for any reason, for example",
                 "url": "http://www.felixcloutier.com/x86/MOVNTDQA.html"
             };
@@ -2141,7 +2073,7 @@ export function getAsmOpcode(opcode) {
 
         case "OUT":
             return {
-                "html": "<p>Copies the value from the second operand (source operand) to the I/O port specified with the destination operand (first operand). The source operand can be register AL, AX, or EAX, depending on the size of the port being accessed (8, 16, or 32 bits, respectively); the destination operand can be a byte-immediate or the DX register. Using a byte immediate allows I/O port addresses 0 to 255 to be accessed; using the DX register as a source operand allows I/O ports from 0 to 65,535 to be accessed.</p><p>The size of the I/O port being accessed is determined by the opcode for an 8-bit I/O port or by the operand-size attribute of the instruction for a 16- or 32-bit I/O port.</p><p>At the machine code level, I/O instructions are shorter when accessing 8-bit I/O ports. Here, the upper eight bits of the port address will be 0.</p><p>This instruction is only useful for accessing I/O ports located in the processor\u2019s I/O address space. See Chapter 18, \u201cInput/Output,\u201d in the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1</em>, for more information on accessing I/O ports in the I/O address space.</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
+                "html": "<p>Copies the value from the second operand (source operand) to the I/O port specified with the destination operand (first operand). The source operand can be register AL, AX, or EAX, depending on the size of the port being accessed (8, 16, or 32 bits, respectively); the destination operand can be a byte-immediate or the DX register. Using a byte immediate allows I/O port addresses 0 to 255 to be accessed; using the DX register as a source operand allows I/O ports from 0 to 65,535 to be accessed.</p><p>The size of the I/O port being accessed is determined by the opcode for an 8-bit I/O port or by the operand-size attribute of the instruction for a 16- or 32-bit I/O port.</p><p>At the machine code level, I/O instructions are shorter when accessing 8-bit I/O ports. Here, the upper eight bits of the port address will be 0.</p><p>This instruction is only useful for accessing I/O ports located in the processor\u2019s I/O address space. See Chapter 19, \u201cInput/Output,\u201d in the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1</em>, for more information on accessing I/O ports in the I/O address space.</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
                 "tooltip": "Copies the value from the second operand (source operand) to the I/O port specified with the destination operand (first operand). The source operand can be register AL, AX, or EAX, depending on the size of the port being accessed (8, 16, or 32 bits, respectively); the destination operand can be a byte-immediate or the DX register. Using a byte immediate allows I/O port addresses 0 to 255 to be accessed; using the DX register as a source operand allows I/O ports from 0 to 65,535 to be accessed.",
                 "url": "http://www.felixcloutier.com/x86/OUT.html"
             };
@@ -2151,7 +2083,7 @@ export function getAsmOpcode(opcode) {
         case "OUTSD":
         case "OUTSW":
             return {
-                "html": "<p>Copies data from the source operand (second operand) to the I/O port specified with the destination operand (first operand). The source operand is a memory location, the address of which is read from either the DS:SI, DS:ESI or the RSI registers (depending on the address-size attribute of the instruction, 16, 32 or 64, respectively). (The DS segment may be overridden with a segment override prefix.) The destination operand is an I/O port address (from 0 to 65,535) that is read from the DX register. The size of the I/O port being accessed (that is, the size of the source and destination operands) is determined by the opcode for an 8-bit I/O port or by the operand-size attribute of the instruction for a 16- or 32-bit I/O port.</p><p>At the assembly-code level, two forms of this instruction are allowed: the \u201cexplicit-operands\u201d form and the \u201cno-operands\u201d form. The explicit-operands form (specified with the OUTS mnemonic) allows the source and destination operands to be specified explicitly. Here, the source operand should be a symbol that indicates the size of the I/O port and the source address, and the destination operand must be DX. This explicit-operands form is provided to allow documentation; however, note that the documentation provided by this form can be misleading. That is, the source operand symbol must specify the correct <strong>type</strong> (size) of the operand (byte, word, or doubleword), but it does not have to specify the correct <strong>location</strong>. The location is always specified by the DS:(E)SI or RSI registers, which must be loaded correctly before the OUTS instruction is executed.</p><p>The no-operands form provides \u201cshort forms\u201d of the byte, word, and doubleword versions of the OUTS instructions. Here also DS:(E)SI is assumed to be the source operand and DX is assumed to be the destination operand. The size of the I/O port is specified with the choice of mnemonic: OUTSB (byte), OUTSW (word), or OUTSD (doubleword).</p><p>After the byte, word, or doubleword is transferred from the memory location to the I/O port, the SI/ESI/RSI register is incremented or decremented automatically according to the setting of the DF flag in the EFLAGS register. (If the DF flag is 0, the (E)SI register is incremented; if the DF flag is 1, the SI/ESI/RSI register is decremented.) The SI/ESI/RSI register is incremented or decremented by 1 for byte operations, by 2 for word operations, and by 4 for doubleword operations.</p><p>The OUTS, OUTSB, OUTSW, and OUTSD instructions can be preceded by the REP prefix for block input of ECX bytes, words, or doublewords. See \u201cREP/REPE/REPZ /REPNE/REPNZ\u2014Repeat String Operation Prefix\u201d in this chapter for a description of the REP prefix. This instruction is only useful for accessing I/O ports located in the processor\u2019s I/O address space. See Chapter 18, \u201cInput/Output,\u201d in the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1</em>, for more information on accessing I/O ports in the I/O address space.</p>",
+                "html": "<p>Copies data from the source operand (second operand) to the I/O port specified with the destination operand (first operand). The source operand is a memory location, the address of which is read from either the DS:SI, DS:ESI or the RSI registers (depending on the address-size attribute of the instruction, 16, 32 or 64, respectively). (The DS segment may be overridden with a segment override prefix.) The destination operand is an I/O port address (from 0 to 65,535) that is read from the DX register. The size of the I/O port being accessed (that is, the size of the source and destination operands) is determined by the opcode for an 8-bit I/O port or by the operand-size attribute of the instruction for a 16- or 32-bit I/O port.</p><p>At the assembly-code level, two forms of this instruction are allowed: the \u201cexplicit-operands\u201d form and the \u201cno-operands\u201d form. The explicit-operands form (specified with the OUTS mnemonic) allows the source and destination operands to be specified explicitly. Here, the source operand should be a symbol that indicates the size of the I/O port and the source address, and the destination operand must be DX. This explicit-operands form is provided to allow documentation; however, note that the documentation provided by this form can be misleading. That is, the source operand symbol must specify the correct <strong>type</strong> (size) of the operand (byte, word, or doubleword), but it does not have to specify the correct <strong>location</strong>. The location is always specified by the DS:(E)SI or RSI registers, which must be loaded correctly before the OUTS instruction is executed.</p><p>The no-operands form provides \u201cshort forms\u201d of the byte, word, and doubleword versions of the OUTS instructions. Here also DS:(E)SI is assumed to be the source operand and DX is assumed to be the destination operand. The size of the I/O port is specified with the choice of mnemonic: OUTSB (byte), OUTSW (word), or OUTSD (doubleword).</p><p>After the byte, word, or doubleword is transferred from the memory location to the I/O port, the SI/ESI/RSI register is incremented or decremented automatically according to the setting of the DF flag in the EFLAGS register. (If the DF flag is 0, the (E)SI register is incremented; if the DF flag is 1, the SI/ESI/RSI register is decremented.) The SI/ESI/RSI register is incremented or decremented by 1 for byte operations, by 2 for word operations, and by 4 for doubleword operations.</p><p>The OUTS, OUTSB, OUTSW, and OUTSD instructions can be preceded by the REP prefix for block input of ECX bytes, words, or doublewords. See \u201cREP/REPE/REPZ /REPNE/REPNZ\u2014Repeat String Operation Prefix\u201d in this chapter for a description of the REP prefix. This instruction is only useful for accessing I/O ports located in the processor\u2019s I/O address space. See Chapter 19, \u201cInput/Output,\u201d in the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1</em>, for more information on accessing I/O ports in the I/O address space.</p>",
                 "tooltip": "Copies data from the source operand (second operand) to the I/O port specified with the destination operand (first operand). The source operand is a memory location, the address of which is read from either the DS:SI, DS:ESI or the RSI registers (depending on the address-size attribute of the instruction, 16, 32 or 64, respectively). (The DS segment may be overridden with a segment override prefix.) The destination operand is an I/O port address (from 0 to 65,535) that is read from the DX register. The size of the I/O port being accessed (that is, the size of the source and destination operands) is determined by the opcode for an 8-bit I/O port or by the operand-size attribute of the instruction for a 16- or 32-bit I/O port.",
                 "url": "http://www.felixcloutier.com/x86/OUTS%3AOUTSB%3AOUTSW%3AOUTSD.html"
             };
@@ -2232,7 +2164,7 @@ export function getAsmOpcode(opcode) {
         case "PALIGNR":
         case "VPALIGNR":
             return {
-                "html": "<p>(V)PALIGNR concatenates the destination operand (the first operand) and the source operand (the second operand) into an intermediate composite, shifts the composite at byte granularity to the right by a constant immediate, and extracts the right-aligned result into the destination. The first and the second operands can be an MMX,</p><p>XMM or a YMM register. The immediate value is considered unsigned. Immediate shift counts larger than the 2L (i.e. 32 for 128-bit operands, or 16 for 64-bit operands) produce a zero result. Both operands can be MMX registers, XMM registers or YMM registers. When the source operand is a 128-bit memory operand, the operand must be aligned on a 16-byte boundary or a general-protection exception (#GP) will be generated.</p><p>In 64-bit mode and not encoded by VEX/EVEX prefix, use the REX prefix to access additional registers.</p><p>128-bit Legacy SSE version: Bits (MAXVL-1:128) of the corresponding YMM destination register remain unchanged.</p><p>EVEX.512 encoded version: The first source operand is a ZMM register and contains four 16-byte blocks. The second source operand is a ZMM register or a 512-bit memory location containing four 16-byte block. The destination operand is a ZMM register and contain four 16-byte results. The imm8[7:0] is the common shift count</p>",
+                "html": "<p>(V)PALIGNR concatenates the destination operand (the first operand) and the source operand (the second operand) into an intermediate composite, shifts the composite at byte granularity to the right by a constant immediate, and extracts the right-aligned result into the destination. The first and the second operands can be an MMX,</p><p>XMM or a YMM register. The immediate value is considered unsigned. Immediate shift counts larger than the 2L (i.e., 32 for 128-bit operands, or 16 for 64-bit operands) produce a zero result. Both operands can be MMX registers, XMM registers or YMM registers. When the source operand is a 128-bit memory operand, the operand must be aligned on a 16-byte boundary or a general-protection exception (#GP) will be generated.</p><p>In 64-bit mode and not encoded by VEX/EVEX prefix, use the REX prefix to access additional registers.</p><p>128-bit Legacy SSE version: Bits (MAXVL-1:128) of the corresponding YMM destination register remain unchanged.</p><p>EVEX.512 encoded version: The first source operand is a ZMM register and contains four 16-byte blocks. The second source operand is a ZMM register or a 512-bit memory location containing four 16-byte block. The destination operand is a ZMM register and contain four 16-byte results. The imm8[7:0] is the common shift count</p>",
                 "tooltip": "(V)PALIGNR concatenates the destination operand (the first operand) and the source operand (the second operand) into an intermediate composite, shifts the composite at byte granularity to the right by a constant immediate, and extracts the right-aligned result into the destination. The first and the second operands can be an MMX",
                 "url": "http://www.felixcloutier.com/x86/PALIGNR.html"
             };
@@ -2293,7 +2225,7 @@ export function getAsmOpcode(opcode) {
         case "PCLMULQDQ":
         case "VPCLMULQDQ":
             return {
-                "html": "<p>Performs a carry-less multiplication of two quadwords, selected from the first source and second source operand according to the value of the immediate byte. Bits 4 and 0 are used to select which 64-bit half of each operand to use according to <a href=\"http://www.felixcloutier.com/x86/PCLMULQDQ.html#tbl-4-13\" rel=\"noreferrer noopener\" target=\"_blank\">Table 4-13</a>, other bits of the immediate byte are ignored.</p><p>The first source operand and the destination operand are the same and must be an XMM register. The second source operand can be an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the corresponding YMM destination register remain unchanged.</p><p>Compilers and assemblers may implement the following pseudo-op syntax to simply programming and emit the required encoding for Imm8.</p>",
+                "html": "<p>Performs a carry-less multiplication of two quadwords, selected from the first source and second source operand according to the value of the immediate byte. Bits 4 and 0 are used to select which 64-bit half of each operand to use according to <a href=\"http://www.felixcloutier.com/x86/PCLMULQDQ.html#tbl-4-13\" rel=\"noreferrer noopener\" target=\"_blank\">Table 4-13</a>, other bits of the immediate byte are ignored.</p><p>The EVEX encoded form of this instruction does not support memory fault suppression.</p><p>The first source operand and the destination operand are the same and must be a ZMM/YMM/XMM register. The second source operand can be a ZMM/YMM/XMM register or a 512/256/128-bit memory location. Bits (VL_MAX-1:128) of the corresponding YMM destination register remain unchanged.</p><p>Compilers and assemblers may implement the following pseudo-op syntax to simplify programming and emit the required encoding for imm8.</p>",
                 "tooltip": "Performs a carry-less multiplication of two quadwords, selected from the first source and second source operand according to the value of the immediate byte. Bits 4 and 0 are used to select which 64-bit half of each operand to use according to Table 4-13, other bits of the immediate byte are ignored.",
                 "url": "http://www.felixcloutier.com/x86/PCLMULQDQ.html"
             };
@@ -2366,6 +2298,13 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/PCMPISTRM.html"
             };
 
+        case "PCONFIG":
+            return {
+                "html": "<p>PCONFIG allows software to configure certain platform features. PCONFIG supports multiple leaf functions, with a leaf function identified by the value in EAX. The registers RBX, RCX, and RDX may provide input or output information for certain leaves. All leaves write status information to EAX but do not modify RBX, RCX, or RDX unless they are being used as leaf-specific output.</p><p>Each PCONFIG leaf function applies to a specific hardware block called a PCONFIG target, and each PCONFIG target is associated with a numerical <strong>target identifier</strong>. Supported target identifiers are enumerated, along with other PCONFIG capabilities, in the sub-leaves of the PCONFIG-information leaf of CPUID (EAX = 1BH). An attempt to execute an undefined leaf function, or a leaf function that applies to an unsupported target identifier, results in a general-protection exception (#GP). (In the future, the PCONFIG-information leaf of CPUID may enumerate PCONFIG capabilities in addition to the supported target identifiers.)</p><p>Addresses and operands are 32 bits outside 64-bit mode and are 64 bits in 64-bit mode. The value of CS.D does not affect operand size or address size.</p><p><a href=\"http://www.felixcloutier.com/x86/PCONFIG.html#tbl-4-15\" rel=\"noreferrer noopener\" target=\"_blank\">Table 4-15</a> shows the leaf encodings for PCONFIG, and <a href=\"http://www.felixcloutier.com/x86/PCONFIG.html#tbl-4-16\" rel=\"noreferrer noopener\" target=\"_blank\">Table 4-16</a> shows the leaf register usage for PCONFIG.</p><p>The MKTME_KEY_PROGRAM leaf of PCONFIG pertains to the MKTME<sup>1</sup> target, which has target identifier 1. It is used by software to manage the key associated with a KeyID. The leaf function is invoked by setting the leaf value of 0 in EAX and the address of MKTME_KEY_PROGRAM_STRUCT in RBX. Successful execution of the leaf clears RAX (set to zero) and ZF, CF, PF, AF, OF, and SF are cleared. In case of failure, the failure reason is indicated in RAX with ZF set to 1 and CF, PF, AF, OF, and SF are cleared. The MKTME_KEY_PROGRAM leaf uses the MKTME_KEY_PROGRAM_STRUCT in memory shown in <a href=\"http://www.felixcloutier.com/x86/PCONFIG.html#tbl-4-17\" rel=\"noreferrer noopener\" target=\"_blank\">Table 4-17</a>.</p>",
+                "tooltip": "PCONFIG allows software to configure certain platform features. PCONFIG supports multiple leaf functions, with a leaf function identified by the value in EAX. The registers RBX, RCX, and RDX may provide input or output information for certain leaves. All leaves write status information to EAX but do not modify RBX, RCX, or RDX unless they are being used as leaf-specific output.",
+                "url": "http://www.felixcloutier.com/x86/PCONFIG.html"
+            };
+
         case "PDEP":
             return {
                 "html": "<p>PDEP uses a mask in the second source operand (the third operand) to transfer/scatter contiguous low order bits in the first source operand (the second operand) into the destination (the first operand). PDEP takes the low bits from the first source operand and deposit them in the destination operand at the corresponding bit locations that are set in the second source operand (mask). All other bits (bits not set in mask) in destination are set to zero.</p><p>This instruction is not supported in real mode and virtual-8086 mode. The operand size is always 32 bits if not in 64-bit mode. In 64-bit mode operand size 64 requires VEX.W1. VEX.W1 is ignored in non-64-bit modes. An attempt to execute this instruction with VEX.L not equal to 0 will cause #UD.</p>",
@@ -2459,8 +2398,8 @@ export function getAsmOpcode(opcode) {
         case "PINSRW":
         case "VPINSRW":
             return {
-                "html": "<p>Copies a word from the source operand (second operand) and inserts it in the destination operand (first operand) at the location specified with the count operand (third operand). (The other words in the destination register are left untouched.) The source operand can be a general-purpose register or a 16-bit memory location. (When the source operand is a general-purpose register, the low word of the register is copied.) The destination operand can be an MMX technology register or an XMM register. The count operand is an 8-bit immediate. When specifying a word location in an MMX technology register, the 2 least-significant bits of the count operand specify the location; for an XMM register, the 3 least-significant bits specify the location.</p><p>In 64-bit mode and not encoded with VEX/EVEX, using a REX prefix in the form of REX.R permits this instruction to access additional registers (XMM8-XMM15, R8-15).</p><p>128-bit Legacy SSE version: Bits (MAXVL-1:128) of the corresponding YMM destination register remain unchanged.</p><p>VEX.128 encoded version: Bits (MAXVL-1:128) of the destination YMM register are zeroed. VEX.L must be 0, otherwise the instruction will #UD.</p><p>EVEX.128 encoded version: Bits (MAXVL-1:128) of the destination register are zeroed. EVEX.L\u2019L must be 0, otherwise the instruction will #UD.</p>",
-                "tooltip": "Copies a word from the source operand (second operand) and inserts it in the destination operand (first operand) at the location specified with the count operand (third operand). (The other words in the destination register are left untouched.) The source operand can be a general-purpose register or a 16-bit memory location. (When the source operand is a general-purpose register, the low word of the register is copied.) The destination operand can be an MMX technology register or an XMM register. The count operand is an 8-bit immediate. When specifying a word location in an MMX technology register, the 2 least-significant bits of the count operand specify the location; for an XMM register, the 3 least-significant bits specify the location.",
+                "html": "<p>Three operand MMX and SSE instructions:</p><p>Copies a word from the source operand and inserts it in the destination operand at the location specified with the count operand. (The other words in the destination register are left untouched.) The source operand can be a general-purpose register or a 16-bit memory location. (When the source operand is a general-purpose register, the low word of the register is copied.) The destination operand can be an MMX technology register or an XMM register. The count operand is an 8-bit immediate. When specifying a word location in an MMX technology register, the 2 least-significant bits of the count operand specify the location; for an XMM register, the 3 least-significant bits specify the location.</p><p>Bits (MAXVL-1:128) of the corresponding YMM destination register remain unchanged.</p><p>Four operand AVX and AVX-512 instructions:</p><p>Combines a word from the first source operand with the second source operand, and inserts it in the destination operand at the location specified with the count operand. The second source operand can be a general-purpose register or a 16-bit memory location. (When the source operand is a general-purpose register, the low word of the register is copied.) The first source and destination operands are XMM registers. The count operand is an 8-bit immediate. When specifying a word location, the 3 least-significant bits specify the location.</p>",
+                "tooltip": "Three operand MMX and SSE instructions",
                 "url": "http://www.felixcloutier.com/x86/PINSRW.html"
             };
 
@@ -2597,7 +2536,7 @@ export function getAsmOpcode(opcode) {
         case "PMULDQ":
         case "VPMULDQ":
             return {
-                "html": "<p>Multiplies packed signed doubleword integers in the even-numbered (zero-based reference) elements of the first source operand with the packed signed doubleword integers in the corresponding elements of the second source operand and stores packed signed quadword results in the destination operand.</p><p>128-bit Legacy SSE version: The input signed doubleword integers are taken from the even-numbered elements of the source operands, i.e. the first (low) and third doubleword element. For 128-bit memory operands, 128 bits are fetched from memory, but only the first and third doublewords are used in the computation. The first source operand and the destination XMM operand is the same. The second source operand can be an XMM register or 128-bit memory location. Bits (MAXVL-1:128) of the corresponding destination register remain unchanged.</p><p>VEX.128 encoded version: The input signed doubleword integers are taken from the even-numbered elements of the source operands, i.e., the first (low) and third doubleword element. For 128-bit memory operands, 128 bits are fetched from memory, but only the first and third doublewords are used in the computation.The first source operand and the destination operand are XMM registers. The second source operand can be an XMM register or 128-bit memory location. Bits (MAXVL-1:128) of the corresponding destination register are zeroed.</p><p>VEX.256 encoded version: The input signed doubleword integers are taken from the even-numbered elements of the source operands, i.e. the first, 3rd, 5th, 7th doubleword element. For 256-bit memory operands, 256 bits are fetched from memory, but only the four even-numbered doublewords are used in the computation. The first source operand and the destination operand are YMM registers. The second source operand can be a YMM register or 256-bit memory location. Bits (MAXVL-1:256) of the corresponding destination ZMM register are zeroed.</p><p>EVEX encoded version: The input signed doubleword integers are taken from the even-numbered elements of the source operands. The first source operand is a ZMM/YMM/XMM registers. The second source operand can be an ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 64-bit memory location. The destination is a ZMM/YMM/XMM register, and updated according to the writemask at 64-bit granularity.</p>",
+                "html": "<p>Multiplies packed signed doubleword integers in the even-numbered (zero-based reference) elements of the first source operand with the packed signed doubleword integers in the corresponding elements of the second source operand and stores packed signed quadword results in the destination operand.</p><p>128-bit Legacy SSE version: The input signed doubleword integers are taken from the even-numbered elements of the source operands, i.e., the first (low) and third doubleword element. For 128-bit memory operands, 128 bits are fetched from memory, but only the first and third doublewords are used in the computation. The first source operand and the destination XMM operand is the same. The second source operand can be an XMM register or 128-bit memory location. Bits (MAXVL-1:128) of the corresponding destination register remain unchanged.</p><p>VEX.128 encoded version: The input signed doubleword integers are taken from the even-numbered elements of the source operands, i.e., the first (low) and third doubleword element. For 128-bit memory operands, 128 bits are fetched from memory, but only the first and third doublewords are used in the computation.The first source operand and the destination operand are XMM registers. The second source operand can be an XMM register or 128-bit memory location. Bits (MAXVL-1:128) of the corresponding destination register are zeroed.</p><p>VEX.256 encoded version: The input signed doubleword integers are taken from the even-numbered elements of the source operands, i.e., the first, 3rd, 5th, 7th doubleword element. For 256-bit memory operands, 256 bits are fetched from memory, but only the four even-numbered doublewords are used in the computation. The first source operand and the destination operand are YMM registers. The second source operand can be a YMM register or 256-bit memory location. Bits (MAXVL-1:256) of the corresponding destination ZMM register are zeroed.</p><p>EVEX encoded version: The input signed doubleword integers are taken from the even-numbered elements of the source operands. The first source operand is a ZMM/YMM/XMM registers. The second source operand can be an ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 64-bit memory location. The destination is a ZMM/YMM/XMM register, and updated according to the writemask at 64-bit granularity.</p>",
                 "tooltip": "Multiplies packed signed doubleword integers in the even-numbered (zero-based reference) elements of the first source operand with the packed signed doubleword integers in the corresponding elements of the second source operand and stores packed signed quadword results in the destination operand.",
                 "url": "http://www.felixcloutier.com/x86/PMULDQ.html"
             };
@@ -2677,7 +2616,7 @@ export function getAsmOpcode(opcode) {
         case "POPFD":
         case "POPFQ":
             return {
-                "html": "<p>Pops a doubleword (POPFD) from the top of the stack (if the current operand-size attribute is 32) and stores the value in the EFLAGS register, or pops a word from the top of the stack (if the operand-size attribute is 16) and stores it in the lower 16 bits of the EFLAGS register (that is, the FLAGS register). These instructions reverse the operation of the PUSHF/PUSHFD/PUSHFQ instructions.</p><p>The POPF (pop flags) and POPFD (pop flags double) mnemonics reference the same opcode. The POPF instruction is intended for use when the operand-size attribute is 16; the POPFD instruction is intended for use when the operand-size attribute is 32. Some assemblers may force the operand size to 16 for POPF and to 32 for POPFD. Others may treat the mnemonics as synonyms (POPF/POPFD) and use the setting of the operand-size attribute to determine the size of values to pop from the stack.</p><p>The effect of POPF/POPFD on the EFLAGS register changes, depending on the mode of operation. See <a href=\"http://www.felixcloutier.com/x86/POPF:POPFD:POPFQ.html#tbl-4-15\" rel=\"noreferrer noopener\" target=\"_blank\">Table 4-15</a> and the key below for details.</p><p>When operating in protected, compatibility, or 64-bit mode at privilege level 0 (or in real-address mode, the equivalent to privilege level 0), all non-reserved flags in the EFLAGS register except RF<sup>1</sup>, VIP, VIF, and VM may be modified. VIP, VIF and VM remain unaffected.</p><p>When operating in protected, compatibility, or 64-bit mode with a privilege level greater than 0, but less than or equal to IOPL, all flags can be modified except the IOPL field and RF, IF, VIP, VIF, and VM; these remain unaffected. The AC and ID flags can only be modified if the operand-size attribute is 32. The interrupt flag (IF) is altered only when executing at a level at least as privileged as the IOPL. If a POPF/POPFD instruction is executed with insufficient privilege, an exception does not occur but privileged bits do not change.</p>",
+                "html": "<p>Pops a doubleword (POPFD) from the top of the stack (if the current operand-size attribute is 32) and stores the value in the EFLAGS register, or pops a word from the top of the stack (if the operand-size attribute is 16) and stores it in the lower 16 bits of the EFLAGS register (that is, the FLAGS register). These instructions reverse the operation of the PUSHF/PUSHFD/PUSHFQ instructions.</p><p>The POPF (pop flags) and POPFD (pop flags double) mnemonics reference the same opcode. The POPF instruction is intended for use when the operand-size attribute is 16; the POPFD instruction is intended for use when the operand-size attribute is 32. Some assemblers may force the operand size to 16 for POPF and to 32 for POPFD. Others may treat the mnemonics as synonyms (POPF/POPFD) and use the setting of the operand-size attribute to determine the size of values to pop from the stack.</p><p>The effect of POPF/POPFD on the EFLAGS register changes, depending on the mode of operation. See <a href=\"http://www.felixcloutier.com/x86/POPF:POPFD:POPFQ.html#tbl-4-21\" rel=\"noreferrer noopener\" target=\"_blank\">Table 4-21</a> and the key below for details.</p><p>When operating in protected, compatibility, or 64-bit mode at privilege level 0 (or in real-address mode, the equivalent to privilege level 0), all non-reserved flags in the EFLAGS register except RF<sup>1</sup>, VIP, VIF, and VM may be modified. VIP, VIF and VM remain unaffected.</p><p>When operating in protected, compatibility, or 64-bit mode with a privilege level greater than 0, but less than or equal to IOPL, all flags can be modified except the IOPL field and RF, IF, VIP, VIF, and VM; these remain unaffected. The AC and ID flags can only be modified if the operand-size attribute is 32. The interrupt flag (IF) is altered only when executing at a level at least as privileged as the IOPL. If a POPF/POPFD instruction is executed with insufficient privilege, an exception does not occur but privileged bits do not change.</p>",
                 "tooltip": "Pops a doubleword (POPFD) from the top of the stack (if the current operand-size attribute is 32) and stores the value in the EFLAGS register, or pops a word from the top of the stack (if the operand-size attribute is 16) and stores it in the lower 16 bits of the EFLAGS register (that is, the FLAGS register). These instructions reverse the operation of the PUSHF/PUSHFD/PUSHFQ instructions.",
                 "url": "http://www.felixcloutier.com/x86/POPF%3APOPFD%3APOPFQ.html"
             };
@@ -2701,7 +2640,7 @@ export function getAsmOpcode(opcode) {
 
         case "PREFETCHWT1":
             return {
-                "html": "<p>Fetches the line of data from memory that contains the byte specified with the source operand to a location in the cache hierarchy specified by an intent to write hint (so that data is brought into \u2018Exclusive\u2019 state via a request for ownership) and a locality hint:</p><p>The source operand is a byte memory location. (The locality hints are encoded into the machine level instruction using bits 3 through 5 of the ModR/M byte. Use of any ModR/M value other than the specified ones will lead to unpredictable behavior.)</p><p>If the line selected is already present in the cache hierarchy at a level closer to the processor, no data movement occurs. Prefetches from uncacheable or WC memory are ignored.</p><p>The PREFETCHh instruction is merely a hint and does not affect program behavior. If executed, this instruction moves data closer to the processor in anticipation of future use.</p><p>The implementation of prefetch locality hints is implementation-dependent, and can be overloaded or ignored by a processor implementation. The amount of data prefetched is also processor implementation-dependent. It will, however, be a minimum of 32 bytes.</p>",
+                "html": "<p>Fetches the line of data from memory that contains the byte specified with the source operand to a location in the cache hierarchy specified by an intent to write hint (so that data is brought into \u2018Exclusive\u2019 state via a request for ownership) and a locality hint:</p><p>The source operand is a byte memory location. (The locality hints are encoded into the machine level instruction using bits 3 through 5 of the ModR/M byte. Use of any ModR/M value other than the specified ones will lead to unpredictable behavior.)</p><p>If the line selected is already present in the cache hierarchy at a level closer to the processor, no data movement occurs. Prefetches from uncacheable or WC memory are ignored.</p><p>The PREFETCHWT1 instruction is merely a hint and does not affect program behavior. If executed, this instruction moves data closer to the processor in anticipation of future use.</p><p>The implementation of prefetch locality hints is implementation-dependent, and can be overloaded or ignored by a processor implementation. The amount of data prefetched is also processor implementation-dependent. It will, however, be a minimum of 32 bytes. Additional details of the implementation-dependent locality hints are described in Section 9.5, \u201cMemory Optimization Using Prefetch\u201d of the Intel\u00ae 64 and IA-32 Architectures Optimization Reference Manual.</p>",
                 "tooltip": "Fetches the line of data from memory that contains the byte specified with the source operand to a location in the cache hierarchy specified by an intent to write hint (so that data is brought into \u2018Exclusive\u2019 state via a request for ownership) and a locality hint",
                 "url": "http://www.felixcloutier.com/x86/PREFETCHWT1.html"
             };
@@ -2735,7 +2674,7 @@ export function getAsmOpcode(opcode) {
         case "PSHUFD":
         case "VPSHUFD":
             return {
-                "html": "<p>Copies doublewords from source operand (second operand) and inserts them in the destination operand (first operand) at the locations selected with the order operand (third operand). <span class=\"not-imported\">Figure 4-16</span> shows the operation of the 256-bit VPSHUFD instruction and the encoding of the order operand. Each 2-bit field in the order operand selects the contents of one doubleword location within a 128-bit lane and copy to the target element in the destination operand. For example, bits 0 and 1 of the order operand targets the first doubleword element in the low and high 128-bit lane of the destination operand for 256-bit VPSHUFD. The encoded value of bits 1:0 of the order operand (see the field encoding in <span class=\"not-imported\">Figure 4-16</span>) determines which doubleword element (from the respective 128-bit lane) of the source operand will be copied to doubleword 0 of the destination operand.</p><p>For 128-bit operation, only the low 128-bit lane are operative. The source operand can be an XMM register or a 128-bit memory location. The destination operand is an XMM register. The order operand is an 8-bit immediate. Note that this instruction permits a doubleword in the source operand to be copied to more than one doubleword location in the destination operand.</p><p>10B - X2 ORDER Operand 11B-X7 7 6 5 4 3 2 1 0 Operand 11B-X3</p><p>The source operand can be an XMM register or a 128-bit memory location. The destination operand is an XMM register. The order operand is an 8-bit immediate. Note that this instruction permits a doubleword in the source operand to be copied to more than one doubleword location in the destination operand.</p><p>In 64-bit mode and not encoded in VEX/EVEX, using REX.R permits this instruction to access XMM8-XMM15.</p>",
+                "html": "<p>Copies doublewords from source operand (second operand) and inserts them in the destination operand (first operand) at the locations selected with the order operand (third operand). <span class=\"not-imported\">Figure 4-16</span> shows the operation of the 256-bit VPSHUFD instruction and the encoding of the order operand. Each 2-bit field in the order operand selects the contents of one doubleword location within a 128-bit lane and copy to the target element in the destination operand. For example, bits 0 and 1 of the order operand targets the first doubleword element in the low and high 128-bit lane of the destination operand for 256-bit VPSHUFD. The encoded value of bits 1:0 of the order operand (see the field encoding in <span class=\"not-imported\">Figure 4-16</span>) determines which doubleword element (from the respective 128-bit lane) of the source operand will be copied to doubleword 0 of the destination operand.</p><p>For 128-bit operation, only the low 128-bit lane are operative. The source operand can be an XMM register or a 128-bit memory location. The destination operand is an XMM register. The order operand is an 8-bit immediate. Note that this instruction permits a doubleword in the source operand to be copied to more than one doubleword location in the destination operand.</p><p>10B - X2 ORDER 11B - X7 7 6 5 4 3 2 1 0 Operand 11B - X3 Operand</p><p>The source operand can be an XMM register or a 128-bit memory location. The destination operand is an XMM register. The order operand is an 8-bit immediate. Note that this instruction permits a doubleword in the source operand to be copied to more than one doubleword location in the destination operand.</p><p>In 64-bit mode and not encoded in VEX/EVEX, using REX.R permits this instruction to access XMM8-XMM15.</p>",
                 "tooltip": "Copies doublewords from source operand (second operand) and inserts them in the destination operand (first operand) at the locations selected with the order operand (third operand). Figure 4-16 shows the operation of the 256-bit VPSHUFD instruction and the encoding of the order operand. Each 2-bit field in the order operand selects the contents of one doubleword location within a 128-bit lane and copy to the target element in the destination operand. For example, bits 0 and 1 of the order operand targets the first doubleword element in the low and high 128-bit lane of the destination operand for 256-bit VPSHUFD. The encoded value of bits 1:0 of the order operand (see the field encoding in Figure 4-16) determines which doubleword element (from the respective 128-bit lane) of the source operand will be copied to doubleword 0 of the destination operand.",
                 "url": "http://www.felixcloutier.com/x86/PSHUFD.html"
             };
@@ -2770,7 +2709,7 @@ export function getAsmOpcode(opcode) {
         case "VPSIGND":
         case "VPSIGNW":
             return {
-                "html": "<p>(V)PSIGNB/(V)PSIGNW/(V)PSIGND negates each data element of the destination operand (the first operand) if the signed integer value of the corresponding data element in the source operand (the second operand) is less than zero. If the signed integer value of a data element in the source operand is positive, the corresponding data element in the destination operand is unchanged. If a data element in the source operand is zero, the corresponding data element in the destination operand is set to zero.</p><p>(V)PSIGNB operates on signed bytes. (V)PSIGNW operates on 16-bit signed words. (V)PSIGND operates on signed 32-bit integers. When the source operand is a 128bit memory operand, the operand must be aligned on a 16-byte boundary or a general-protection exception (#GP) will be generated.</p><p>Legacy SSE instructions: Both operands can be MMX registers. In 64-bit mode, use the REX prefix to access additional registers.</p><p>128-bit Legacy SSE version: The first source and destination operands are XMM registers. The second source operand is an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the corresponding YMM destination register remain unchanged.</p><p>VEX.128 encoded version: The first source and destination operands are XMM registers. The second source operand is an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the destination YMM register are zeroed. VEX.L must be 0, otherwise instructions will #UD.</p>",
+                "html": "<p>(V)PSIGNB/(V)PSIGNW/(V)PSIGND negates each data element of the destination operand (the first operand) if the signed integer value of the corresponding data element in the source operand (the second operand) is less than zero. If the signed integer value of a data element in the source operand is positive, the corresponding data element in the destination operand is unchanged. If a data element in the source operand is zero, the corresponding data element in the destination operand is set to zero.</p><p>(V)PSIGNB operates on signed bytes. (V)PSIGNW operates on 16-bit signed words. (V)PSIGND operates on signed 32-bit integers.</p><p>Legacy SSE instructions: Both operands can be MMX registers. In 64-bit mode, use the REX prefix to access additional registers.</p><p>128-bit Legacy SSE version: The first source and destination operands are XMM registers. The second source operand is an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the corresponding YMM destination register remain unchanged.</p><p>VEX.128 encoded version: The first source and destination operands are XMM registers. The second source operand is an XMM register or a 128-bit memory location. Bits (MAXVL-1:128) of the destination YMM register are zeroed. VEX.L must be 0, otherwise instructions will #UD.</p>",
                 "tooltip": "(V)PSIGNB/(V)PSIGNW/(V)PSIGND negates each data element of the destination operand (the first operand) if the signed integer value of the corresponding data element in the source operand (the second operand) is less than zero. If the signed integer value of a data element in the source operand is positive, the corresponding data element in the destination operand is unchanged. If a data element in the source operand is zero, the corresponding data element in the destination operand is set to zero.",
                 "url": "http://www.felixcloutier.com/x86/PSIGNB%3APSIGNW%3APSIGND.html"
             };
@@ -2876,8 +2815,8 @@ export function getAsmOpcode(opcode) {
 
         case "PTWRITE":
             return {
-                "html": "<p>This instruction reads data in the source operand and sends it to the Intel Processor Trace hardware to be encoded in a PTW packet if TriggerEn, ContextEn, FilterEn, and PTWEn are all set to 1. For more details on these values, see <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3C</em>, Section 35.2.2, \u201cSoftware Trace Instrumentation with PTWRITE\u201d. The size of data is 64-bit if using REX.W in 64-bit mode, otherwise 32-bits of data are copied from the source operand.</p><p>Note: The instruction will #UD if prefix 66H is used.</p>",
-                "tooltip": "This instruction reads data in the source operand and sends it to the Intel Processor Trace hardware to be encoded in a PTW packet if TriggerEn, ContextEn, FilterEn, and PTWEn are all set to 1. For more details on these values, see Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3C, Section 35.2.2, \u201cSoftware Trace Instrumentation with PTWRITE\u201d. The size of data is 64-bit if using REX.W in 64-bit mode, otherwise 32-bits of data are copied from the source operand.",
+                "html": "<p>This instruction reads data in the source operand and sends it to the Intel Processor Trace hardware to be encoded in a PTW packet if TriggerEn, ContextEn, FilterEn, and PTWEn are all set to 1. For more details on these values, see <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3C</em>, Section 32.2.2, \u201cSoftware Trace Instrumentation with PTWRITE\u201d. The size of data is 64-bit if using REX.W in 64-bit mode, otherwise 32-bits of data are copied from the source operand.</p><p>Note: The instruction will #UD if prefix 66H is used.</p>",
+                "tooltip": "This instruction reads data in the source operand and sends it to the Intel Processor Trace hardware to be encoded in a PTW packet if TriggerEn, ContextEn, FilterEn, and PTWEn are all set to 1. For more details on these values, see Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3C, Section 32.2.2, \u201cSoftware Trace Instrumentation with PTWRITE\u201d. The size of data is 64-bit if using REX.W in 64-bit mode, otherwise 32-bits of data are copied from the source operand.",
                 "url": "http://www.felixcloutier.com/x86/PTWRITE.html"
             };
 
@@ -2911,7 +2850,7 @@ export function getAsmOpcode(opcode) {
 
         case "PUSH":
             return {
-                "html": "<p>Decrements the stack pointer and then stores the source operand on the top of the stack. Address and operand sizes are determined and used as follows:</p><p>The address size is used only when referencing a source operand in memory.</p><p>The operand size (16, 32, or 64 bits) determines the amount by which the stack pointer is decremented (2, 4 or 8).</p><p>If the source operand is an immediate of size less than the operand size, a sign-extended value is pushed on the stack. If the source operand is a segment register (16 bits) and the operand size is 64-bits, a zero-extended value is pushed on the stack; if the operand size is 32-bits, either a zero-extended value is pushed on the stack or the segment selector is written on the stack using a 16-bit move. For the last case, all recent Core and Atom processors perform a 16-bit move, leaving the upper portion of the stack location unmodified.</p><p>The PUSH ESP instruction pushes the value of the ESP register as it existed before the instruction was executed. If a PUSH instruction uses a memory operand in which the ESP register is used for computing the operand address, the address of the operand is computed before the ESP register is decremented.</p>",
+                "html": "<p>Decrements the stack pointer and then stores the source operand on the top of the stack. Address and operand sizes are determined and used as follows:</p><p>The address size is used only when referencing a source operand in memory.</p><p>The operand size (16, 32, or 64 bits) determines the amount by which the stack pointer is decremented (2, 4 or 8).</p><p>If the source operand is an immediate of size less than the operand size, a sign-extended value is pushed on the stack. If the source operand is a segment register (16 bits) and the operand size is 64-bits, a zero-extended value is pushed on the stack; if the operand size is 32-bits, either a zero-extended value is pushed on the stack or the segment selector is written on the stack using a 16-bit move. For the last case, all recent Intel Core and Intel Atom processors perform a 16-bit move, leaving the upper portion of the stack location unmodified.</p><p>The stack-address size determines the width of the stack pointer when writing to the stack in memory and when decrementing the stack pointer. (As stated above, the amount by which the stack pointer is decremented is determined by the operand size.)</p>",
                 "tooltip": "Decrements the stack pointer and then stores the source operand on the top of the stack. Address and operand sizes are determined and used as follows",
                 "url": "http://www.felixcloutier.com/x86/PUSH.html"
             };
@@ -2993,8 +2932,8 @@ export function getAsmOpcode(opcode) {
 
         case "RDPMC":
             return {
-                "html": "<p>The EAX register is loaded with the low-order 32 bits. The EDX register is loaded with the supported high-order bits of the counter. The number of high-order bits loaded into EDX is implementation specific on processors that do no support architectural performance monitoring. The width of fixed-function and general-purpose performance counters on processors supporting architectural performance monitoring are reported by CPUID 0AH leaf. See below for the treatment of the EDX register for \u201cfast\u201d reads.</p><p>The ECX register specifies the counter type (if the processor supports architectural performance monitoring) and counter index. Counter type is specified in ECX[30] to select one of two type of performance counters. If the processor does not support architectural performance monitoring, ECX[30:0] specifies the counter index; otherwise ECX[29:0] specifies the index relative to the base of each counter type. ECX[31] selects \u201cfast\u201d read mode if supported. The two counter types are:</p><p>The width of fixed-function performance counters and general-purpose performance counters on processors supporting architectural performance monitoring are reported by CPUID 0AH leaf. The width of general-purpose performance counters are 40-bits for processors that do not support architectural performance monitoring counters. The width of special-purpose performance counters are implementation specific.</p><p>When in protected or virtual 8086 mode, the performance-monitoring counters enabled (PCE) flag in register CR4 restricts the use of the RDPMC instruction as follows. When the PCE flag is set, the RDPMC instruction can be executed at any privilege level; when the flag is clear, the instruction can only be executed at privilege level 0. (When in real-address mode, the RDPMC instruction is always enabled.)</p><p>The performance-monitoring counters can also be read with the RDMSR instruction, when executing at privilege level 0.</p>",
-                "tooltip": "The EAX register is loaded with the low-order 32 bits. The EDX register is loaded with the supported high-order bits of the counter. The number of high-order bits loaded into EDX is implementation specific on processors that do no support architectural performance monitoring. The width of fixed-function and general-purpose performance counters on processors supporting architectural performance monitoring are reported by CPUID 0AH leaf. See below for the treatment of the EDX register for \u201cfast\u201d reads.",
+                "html": "<p>Reads the contents of the performance monitoring counter (PMC) specified in ECX register into registers EDX:EAX. (On processors that support the Intel 64 architecture, the high-order 32 bits of RCX are ignored.) The EDX register is loaded with the high-order 32 bits of the PMC and the EAX register is loaded with the low-order 32 bits. (On processors that support the Intel 64 architecture, the high-order 32 bits of each of RAX and RDX are cleared.) If fewer than 64 bits are implemented in the PMC being read, unimplemented bits returned to EDX:EAX will have value zero.</p><p>The width of PMCs on processors supporting architectural performance monitoring (CPUID.0AH:EAX[7:0] =\u0338 0) are reported by CPUID.0AH:EAX[23:16]. On processors that do not support architectural performance monitoring (CPUID.0AH:EAX[7:0]=0), the width of general-purpose performance PMCs is 40 bits, while the widths of special-purpose PMCs are implementation specific.</p><p>Use of ECX to specify a PMC depends on whether the processor supports architectural performance monitoring:</p><p>Specifying an unsupported PMC encoding will cause a general protection exception #GP(0). For PMC details see Chapter 19, \u201cPerformance Monitoring\u201d, in the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3B</em>.</p><p>When in protected or virtual 8086 mode, the <strong>Performance-monitoring Counters Enabled</strong> (PCE) flag in register CR4 restricts the use of the RDPMC instruction. When the PCE flag is set, the RDPMC instruction can be executed at any privilege level; when the flag is clear, the instruction can only be executed at privilege level 0. (When in real-address mode, the RDPMC instruction is always enabled.) The PMCs can also be read with the RDMSR instruction, when executing at privilege level 0.</p>",
+                "tooltip": "Reads the contents of the performance monitoring counter (PMC) specified in ECX register into registers EDX:EAX. (On processors that support the Intel 64 architecture, the high-order 32 bits of RCX are ignored.) The EDX register is loaded with the high-order 32 bits of the PMC and the EAX register is loaded with the low-order 32 bits. (On processors that support the Intel 64 architecture, the high-order 32 bits of each of RAX and RDX are cleared.) If fewer than 64 bits are implemented in the PMC being read, unimplemented bits returned to EDX:EAX will have value zero.",
                 "url": "http://www.felixcloutier.com/x86/RDPMC.html"
             };
 
@@ -3016,7 +2955,7 @@ export function getAsmOpcode(opcode) {
         case "REPE":
         case "REPNE":
             return {
-                "html": "<p>Repeats a string instruction the number of times specified in the count register or until the indicated condition of the ZF flag is no longer met. The REP (repeat), REPE (repeat while equal), REPNE (repeat while not equal), REPZ (repeat while zero), and REPNZ (repeat while not zero) mnemonics are prefixes that can be added to one of the string instructions. The REP prefix can be added to the INS, OUTS, MOVS, LODS, and STOS instructions, and the REPE, REPNE, REPZ, and REPNZ prefixes can be added to the CMPS and SCAS instructions. (The REPZ and REPNZ prefixes are synonymous forms of the REPE and REPNE prefixes, respectively.) The F3H prefix is defined for the following instructions and undefined for the rest:</p><p>The REP prefixes apply only to one string instruction at a time. To repeat a block of instructions, use the LOOP instruction or another looping construct. All of these repeat prefixes cause the associated instruction to be repeated until the count in register is decremented to 0. See <a href=\"http://www.felixcloutier.com/x86/REP:REPE:REPZ:REPNE:REPNZ.html#tbl-4-16\" rel=\"noreferrer noopener\" target=\"_blank\">Table 4-16</a>.</p><p>The REPE, REPNE, REPZ, and REPNZ prefixes also check the state of the ZF flag after each iteration and terminate the repeat loop if the ZF flag is not in the specified state. When both termination conditions are tested, the cause of a repeat termination can be determined either by testing the count register with a JECXZ instruction or by testing the ZF flag (with a JZ, JNZ, or JNE instruction).</p><p>When the REPE/REPZ and REPNE/REPNZ prefixes are used, the ZF flag does not require initialization because both the CMPS and SCAS instructions affect the ZF flag according to the results of the comparisons they make.</p><p>A repeating string operation can be suspended by an exception or interrupt. When this happens, the state of the registers is preserved to allow the string operation to be resumed upon a return from the exception or interrupt handler. The source and destination registers point to the next string elements to be operated on, the EIP register points to the string instruction, and the ECX register has the value it held following the last successful iteration of the instruction. This mechanism allows long string operations to proceed without affecting the interrupt response time of the system.</p>",
+                "html": "<p>Repeats a string instruction the number of times specified in the count register or until the indicated condition of the ZF flag is no longer met. The REP (repeat), REPE (repeat while equal), REPNE (repeat while not equal), REPZ (repeat while zero), and REPNZ (repeat while not zero) mnemonics are prefixes that can be added to one of the string instructions. The REP prefix can be added to the INS, OUTS, MOVS, LODS, and STOS instructions, and the REPE, REPNE, REPZ, and REPNZ prefixes can be added to the CMPS and SCAS instructions. (The REPZ and REPNZ prefixes are synonymous forms of the REPE and REPNE prefixes, respectively.) The F3H prefix is defined for the following instructions and undefined for the rest:</p><p>The REP prefixes apply only to one string instruction at a time. To repeat a block of instructions, use the LOOP instruction or another looping construct. All of these repeat prefixes cause the associated instruction to be repeated until the count in register is decremented to 0. See <a href=\"http://www.felixcloutier.com/x86/REP:REPE:REPZ:REPNE:REPNZ.html#tbl-4-22\" rel=\"noreferrer noopener\" target=\"_blank\">Table 4-22</a>.</p><p>The REPE, REPNE, REPZ, and REPNZ prefixes also check the state of the ZF flag after each iteration and terminate the repeat loop if the ZF flag is not in the specified state. When both termination conditions are tested, the cause of a repeat termination can be determined either by testing the count register with a JECXZ instruction or by testing the ZF flag (with a JZ, JNZ, or JNE instruction).</p><p>When the REPE/REPZ and REPNE/REPNZ prefixes are used, the ZF flag does not require initialization because both the CMPS and SCAS instructions affect the ZF flag according to the results of the comparisons they make.</p><p>A repeating string operation can be suspended by an exception or interrupt. When this happens, the state of the registers is preserved to allow the string operation to be resumed upon a return from the exception or interrupt handler. The source and destination registers point to the next string elements to be operated on, the EIP register points to the string instruction, and the ECX register has the value it held following the last successful iteration of the instruction. This mechanism allows long string operations to proceed without affecting the interrupt response time of the system.</p>",
                 "tooltip": "Repeats a string instruction the number of times specified in the count register or until the indicated condition of the ZF flag is no longer met. The REP (repeat), REPE (repeat while equal), REPNE (repeat while not equal), REPZ (repeat while zero), and REPNZ (repeat while not zero) mnemonics are prefixes that can be added to one of the string instructions. The REP prefix can be added to the INS, OUTS, MOVS, LODS, and STOS instructions, and the REPE, REPNE, REPZ, and REPNZ prefixes can be added to the CMPS and SCAS instructions. (The REPZ and REPNZ prefixes are synonymous forms of the REPE and REPNE prefixes, respectively.) The F3H prefix is defined for the following instructions and undefined for the rest",
                 "url": "http://www.felixcloutier.com/x86/REP%3AREPE%3AREPZ%3AREPNE%3AREPNZ.html"
             };
@@ -3038,7 +2977,7 @@ export function getAsmOpcode(opcode) {
         case "ROUNDPD":
         case "VROUNDPD":
             return {
-                "html": "<p>Round the 2 double-precision floating-point values in the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the results in the destination operand (first operand). The rounding process rounds each input floating-point value to an integer value and returns the integer result as a double-precision floating-point value.</p><p>The immediate operand specifies control fields for the rounding operation, three bit fields are defined and shown in <a href=\"http://www.felixcloutier.com/x86/ROUNDPD.html#fig-4-24\" rel=\"noreferrer noopener\" target=\"_blank\">Figure 4-24</a>. Bit 3 of the immediate byte controls processor behavior for a precision exception, bit 2 selects the source of rounding mode control. Bits 1:0 specify a non-sticky rounding-mode value (<span class=\"not-imported\">Table 4-17</span> lists the encoded values for rounding-mode field).</p><p>The Precision Floating-Point Exception is signaled according to the immediate operand. If any source operand is an SNaN then it will be converted to a QNaN. If DAZ is set to \u20181 then denormals will be converted to zero before rounding.</p><p>128-bit Legacy SSE version: The second source can be an XMM register or 128-bit memory location. The destination is not distinct from the first source XMM register and the upper bits (MAXVL-1:128) of the corresponding YMM register destination are unmodified.</p><p>VEX.128 encoded version: the source operand second source operand or a 128-bit memory location. The destination operand is an XMM register. The upper bits (MAXVL-1:128) of the corresponding YMM register destination are zeroed.</p>",
+                "html": "<p>Round the 2 double-precision floating-point values in the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the results in the destination operand (first operand). The rounding process rounds each input floating-point value to an integer value and returns the integer result as a double-precision floating-point value.</p><p>The immediate operand specifies control fields for the rounding operation, three bit fields are defined and shown in <a href=\"http://www.felixcloutier.com/x86/ROUNDPD.html#fig-4-24\" rel=\"noreferrer noopener\" target=\"_blank\">Figure 4-24</a>. Bit 3 of the immediate byte controls processor behavior for a precision exception, bit 2 selects the source of rounding mode control. Bits 1:0 specify a non-sticky rounding-mode value (<span class=\"not-imported\">Table 4-23</span> lists the encoded values for rounding-mode field).</p><p>The Precision Floating-Point Exception is signaled according to the immediate operand. If any source operand is an SNaN then it will be converted to a QNaN. If DAZ is set to \u20181 then denormals will be converted to zero before rounding.</p><p>128-bit Legacy SSE version: The second source can be an XMM register or 128-bit memory location. The destination is not distinct from the first source XMM register and the upper bits (MAXVL-1:128) of the corresponding YMM register destination are unmodified.</p><p>VEX.128 encoded version: the source operand second source operand or a 128-bit memory location. The destination operand is an XMM register. The upper bits (MAXVL-1:128) of the corresponding YMM register destination are zeroed.</p>",
                 "tooltip": "Round the 2 double-precision floating-point values in the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the results in the destination operand (first operand). The rounding process rounds each input floating-point value to an integer value and returns the integer result as a double-precision floating-point value.",
                 "url": "http://www.felixcloutier.com/x86/ROUNDPD.html"
             };
@@ -3046,7 +2985,7 @@ export function getAsmOpcode(opcode) {
         case "ROUNDPS":
         case "VROUNDPS":
             return {
-                "html": "<p>Round the 4 single-precision floating-point values in the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the results in the destination operand (first operand). The rounding process rounds each input floating-point value to an integer value and returns the integer result as a single-precision floating-point value.</p><p>The immediate operand specifies control fields for the rounding operation, three bit fields are defined and shown in <a href=\"http://www.felixcloutier.com/x86/ROUNDPD.html#fig-4-24\" rel=\"noreferrer noopener\" target=\"_blank\">Figure 4-24</a>. Bit 3 of the immediate byte controls processor behavior for a precision exception, bit 2 selects the source of rounding mode control. Bits 1:0 specify a non-sticky rounding-mode value (<span class=\"not-imported\">Table 4-17</span> lists the encoded values for rounding-mode field).</p><p>The Precision Floating-Point Exception is signaled according to the immediate operand. If any source operand is an SNaN then it will be converted to a QNaN. If DAZ is set to \u20181 then denormals will be converted to zero before rounding.</p><p>128-bit Legacy SSE version: The second source can be an XMM register or 128-bit memory location. The destination is not distinct from the first source XMM register and the upper bits (MAXVL-1:128) of the corresponding YMM register destination are unmodified.</p><p>VEX.128 encoded version: the source operand second source operand or a 128-bit memory location. The destination operand is an XMM register. The upper bits (MAXVL-1:128) of the corresponding YMM register destination are zeroed.</p>",
+                "html": "<p>Round the 4 single-precision floating-point values in the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the results in the destination operand (first operand). The rounding process rounds each input floating-point value to an integer value and returns the integer result as a single-precision floating-point value.</p><p>The immediate operand specifies control fields for the rounding operation, three bit fields are defined and shown in <a href=\"http://www.felixcloutier.com/x86/ROUNDPD.html#fig-4-24\" rel=\"noreferrer noopener\" target=\"_blank\">Figure 4-24</a>. Bit 3 of the immediate byte controls processor behavior for a precision exception, bit 2 selects the source of rounding mode control. Bits 1:0 specify a non-sticky rounding-mode value (<span class=\"not-imported\">Table 4-23</span> lists the encoded values for rounding-mode field).</p><p>The Precision Floating-Point Exception is signaled according to the immediate operand. If any source operand is an SNaN then it will be converted to a QNaN. If DAZ is set to \u20181 then denormals will be converted to zero before rounding.</p><p>128-bit Legacy SSE version: The second source can be an XMM register or 128-bit memory location. The destination is not distinct from the first source XMM register and the upper bits (MAXVL-1:128) of the corresponding YMM register destination are unmodified.</p><p>VEX.128 encoded version: the source operand second source operand or a 128-bit memory location. The destination operand is an XMM register. The upper bits (MAXVL-1:128) of the corresponding YMM register destination are zeroed.</p>",
                 "tooltip": "Round the 4 single-precision floating-point values in the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the results in the destination operand (first operand). The rounding process rounds each input floating-point value to an integer value and returns the integer result as a single-precision floating-point value.",
                 "url": "http://www.felixcloutier.com/x86/ROUNDPS.html"
             };
@@ -3054,7 +2993,7 @@ export function getAsmOpcode(opcode) {
         case "ROUNDSD":
         case "VROUNDSD":
             return {
-                "html": "<p>Round the DP FP value in the lower qword of the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the result in the destination operand (first operand). The rounding process rounds a double-precision floating-point input to an integer value and returns the integer result as a double precision floating-point value in the lowest position. The upper double precision floating-point value in the destination is retained.</p><p>The immediate operand specifies control fields for the rounding operation, three bit fields are defined and shown in <a href=\"http://www.felixcloutier.com/x86/ROUNDPD.html#fig-4-24\" rel=\"noreferrer noopener\" target=\"_blank\">Figure 4-24</a>. Bit 3 of the immediate byte controls processor behavior for a precision exception, bit 2 selects the source of rounding mode control. Bits 1:0 specify a non-sticky rounding-mode value (<span class=\"not-imported\">Table 4-17</span> lists the encoded values for rounding-mode field).</p><p>The Precision Floating-Point Exception is signaled according to the immediate operand. If any source operand is an SNaN then it will be converted to a QNaN. If DAZ is set to \u20181 then denormals will be converted to zero before rounding.</p><p>128-bit Legacy SSE version: The first source operand and the destination operand are the same. Bits (MAXVL-1:64) of the corresponding YMM destination register remain unchanged.</p><p>VEX.128 encoded version: Bits (MAXVL-1:128) of the destination YMM register are zeroed.</p>",
+                "html": "<p>Round the DP FP value in the lower qword of the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the result in the destination operand (first operand). The rounding process rounds a double-precision floating-point input to an integer value and returns the integer result as a double precision floating-point value in the lowest position. The upper double precision floating-point value in the destination is retained.</p><p>The immediate operand specifies control fields for the rounding operation, three bit fields are defined and shown in <a href=\"http://www.felixcloutier.com/x86/ROUNDPD.html#fig-4-24\" rel=\"noreferrer noopener\" target=\"_blank\">Figure 4-24</a>. Bit 3 of the immediate byte controls processor behavior for a precision exception, bit 2 selects the source of rounding mode control. Bits 1:0 specify a non-sticky rounding-mode value (<span class=\"not-imported\">Table 4-23</span> lists the encoded values for rounding-mode field).</p><p>The Precision Floating-Point Exception is signaled according to the immediate operand. If any source operand is an SNaN then it will be converted to a QNaN. If DAZ is set to \u20181 then denormals will be converted to zero before rounding.</p><p>128-bit Legacy SSE version: The first source operand and the destination operand are the same. Bits (MAXVL-1:64) of the corresponding YMM destination register remain unchanged.</p><p>VEX.128 encoded version: Bits (MAXVL-1:128) of the destination YMM register are zeroed.</p>",
                 "tooltip": "Round the DP FP value in the lower qword of the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the result in the destination operand (first operand). The rounding process rounds a double-precision floating-point input to an integer value and returns the integer result as a double precision floating-point value in the lowest position. The upper double precision floating-point value in the destination is retained.",
                 "url": "http://www.felixcloutier.com/x86/ROUNDSD.html"
             };
@@ -3062,14 +3001,14 @@ export function getAsmOpcode(opcode) {
         case "ROUNDSS":
         case "VROUNDSS":
             return {
-                "html": "<p>Round the single-precision floating-point value in the lowest dword of the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the result in the destination operand (first operand). The rounding process rounds a single-precision floating-point input to an integer value and returns the result as a single-precision floating-point value in the lowest position. The upper three single-precision floating-point values in the destination are retained.</p><p>The immediate operand specifies control fields for the rounding operation, three bit fields are defined and shown in <a href=\"http://www.felixcloutier.com/x86/ROUNDPD.html#fig-4-24\" rel=\"noreferrer noopener\" target=\"_blank\">Figure 4-24</a>. Bit 3 of the immediate byte controls processor behavior for a precision exception, bit 2 selects the source of rounding mode control. Bits 1:0 specify a non-sticky rounding-mode value (<span class=\"not-imported\">Table 4-17</span> lists the encoded values for rounding-mode field).</p><p>The Precision Floating-Point Exception is signaled according to the immediate operand. If any source operand is an SNaN then it will be converted to a QNaN. If DAZ is set to \u20181 then denormals will be converted to zero before rounding.</p><p>128-bit Legacy SSE version: The first source operand and the destination operand are the same. Bits (MAXVL-1:32) of the corresponding YMM destination register remain unchanged.</p><p>VEX.128 encoded version: Bits (MAXVL-1:128) of the destination YMM register are zeroed.</p>",
+                "html": "<p>Round the single-precision floating-point value in the lowest dword of the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the result in the destination operand (first operand). The rounding process rounds a single-precision floating-point input to an integer value and returns the result as a single-precision floating-point value in the lowest position. The upper three single-precision floating-point values in the destination are retained.</p><p>The immediate operand specifies control fields for the rounding operation, three bit fields are defined and shown in <a href=\"http://www.felixcloutier.com/x86/ROUNDPD.html#fig-4-24\" rel=\"noreferrer noopener\" target=\"_blank\">Figure 4-24</a>. Bit 3 of the immediate byte controls processor behavior for a precision exception, bit 2 selects the source of rounding mode control. Bits 1:0 specify a non-sticky rounding-mode value (<span class=\"not-imported\">Table 4-23</span> lists the encoded values for rounding-mode field).</p><p>The Precision Floating-Point Exception is signaled according to the immediate operand. If any source operand is an SNaN then it will be converted to a QNaN. If DAZ is set to \u20181 then denormals will be converted to zero before rounding.</p><p>128-bit Legacy SSE version: The first source operand and the destination operand are the same. Bits (MAXVL-1:32) of the corresponding YMM destination register remain unchanged.</p><p>VEX.128 encoded version: Bits (MAXVL-1:128) of the destination YMM register are zeroed.</p>",
                 "tooltip": "Round the single-precision floating-point value in the lowest dword of the source operand (second operand) using the rounding mode specified in the immediate operand (third operand) and place the result in the destination operand (first operand). The rounding process rounds a single-precision floating-point input to an integer value and returns the result as a single-precision floating-point value in the lowest position. The upper three single-precision floating-point values in the destination are retained.",
                 "url": "http://www.felixcloutier.com/x86/ROUNDSS.html"
             };
 
         case "RSM":
             return {
-                "html": "<p>Returns program control from system management mode (SMM) to the application program or operating-system procedure that was interrupted when the processor received an SMM interrupt. The processor\u2019s state is restored from the dump created upon entering SMM. If the processor detects invalid state information during state restoration, it enters the shutdown state. The following invalid information can cause a shutdown:</p><p>The contents of the model-specific registers are not affected by a return from SMM.</p><p>The SMM state map used by RSM supports resuming processor context for non-64-bit modes and 64-bit mode.</p><p>See Chapter 34, \u201cSystem Management Mode,\u201d in the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3C</em>, for more information about SMM and the behavior of the RSM instruction.</p>",
+                "html": "<p>Returns program control from system management mode (SMM) to the application program or operating-system procedure that was interrupted when the processor received an SMM interrupt. The processor\u2019s state is restored from the dump created upon entering SMM. If the processor detects invalid state information during state restoration, it enters the shutdown state. The following invalid information can cause a shutdown:</p><p>The contents of the model-specific registers are not affected by a return from SMM.</p><p>The SMM state map used by RSM supports resuming processor context for non-64-bit modes and 64-bit mode.</p><p>See Chapter 31, \u201cSystem Management Mode,\u201d in the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3C</em>, for more information about SMM and the behavior of the RSM instruction.</p>",
                 "tooltip": "Returns program control from system management mode (SMM) to the application program or operating-system procedure that was interrupted when the processor received an SMM interrupt. The processor\u2019s state is restored from the dump created upon entering SMM. If the processor detects invalid state information during state restoration, it enters the shutdown state. The following invalid information can cause a shutdown",
                 "url": "http://www.felixcloutier.com/x86/RSM.html"
             };
@@ -3134,6 +3073,20 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/SCAS%3ASCASB%3ASCASW%3ASCASD.html"
             };
 
+        case "SERIALIZE":
+            return {
+                "html": "<p>Serializes instruction execution. Before the next instruction is fetched and executed, the SERIALIZE instruction ensures that all modifications to flags, registers, and memory by previous instructions are completed, draining all buffered writes to memory. This instruction is also a serializing instruction as defined in the section \u201cSerializing Instructions\u201d in Chapter 8 of the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3A</em>.</p><p>SERIALIZE does not modify registers, arithmetic flags, or memory.</p>",
+                "tooltip": "Serializes instruction execution. Before the next instruction is fetched and executed, the SERIALIZE instruction ensures that all modifications to flags, registers, and memory by previous instructions are completed, draining all buffered writes to memory. This instruction is also a serializing instruction as defined in the section \u201cSerializing Instructions\u201d in Chapter 8 of the Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3A.",
+                "url": "http://www.felixcloutier.com/x86/SERIALIZE.html"
+            };
+
+        case "SETSSBSY":
+            return {
+                "html": "<p>The SETSSBSY instruction verifies the presence of a non-busy supervisor shadow stack token at the address in the IA32_PL0_SSP MSR and marks it busy. Following successful execution of the instruction, the SSP is set to the value of the IA32_PL0_SSP MSR.</p>",
+                "tooltip": "The SETSSBSY instruction verifies the presence of a non-busy supervisor shadow stack token at the address in the IA32_PL0_SSP MSR and marks it busy. Following successful execution of the instruction, the SSP is set to the value of the IA32_PL0_SSP MSR.",
+                "url": "http://www.felixcloutier.com/x86/SETSSBSY.html"
+            };
+
         case "SETA":
         case "SETAE":
         case "SETB":
@@ -3411,7 +3364,7 @@ export function getAsmOpcode(opcode) {
 
         case "SYSCALL":
             return {
-                "html": "<p>SYSCALL invokes an OS system-call handler at privilege level 0. It does so by loading RIP from the IA32_LSTAR MSR (after saving the address of the instruction following SYSCALL into RCX). (The WRMSR instruction ensures that the IA32_LSTAR MSR always contain a canonical address.)</p><p>SYSCALL also saves RFLAGS into R11 and then masks RFLAGS using the IA32_FMASK MSR (MSR address C0000084H); specifically, the processor clears in RFLAGS every bit corresponding to a bit that is set in the IA32_FMASK MSR.</p><p>SYSCALL loads the CS and SS selectors with values derived from bits 47:32 of the IA32_STAR MSR. However, the CS and SS descriptor caches are <strong>not</strong> loaded from the descriptors (in GDT or LDT) referenced by those selectors. Instead, the descriptor caches are loaded with fixed values. See the Operation section for details. It is the responsibility of OS software to ensure that the descriptors (in GDT or LDT) referenced by those selector values correspond to the fixed values loaded into the descriptor caches; the SYSCALL instruction does not ensure this correspondence.</p><p>The SYSCALL instruction does not save the stack pointer (RSP). If the OS system-call handler will change the stack pointer, it is the responsibility of software to save the previous value of the stack pointer. This might be done prior to executing SYSCALL, with software restoring the stack pointer with the instruction following SYSCALL (which will be executed after SYSRET). Alternatively, the OS system-call handler may save the stack pointer and restore it before executing SYSRET.</p><p><strong>Instruction ordering.</strong> Instructions following a SYSCALL may be fetched from memory before earlier instructions complete execution, but they will not execute (even speculatively) until all instructions prior to the SYSCALL have completed execution (the later instructions may execute before data stored by the earlier instructions have become globally visible).</p>",
+                "html": "<p>SYSCALL invokes an OS system-call handler at privilege level 0. It does so by loading RIP from the IA32_LSTAR MSR (after saving the address of the instruction following SYSCALL into RCX). (The WRMSR instruction ensures that the IA32_LSTAR MSR always contain a canonical address.)</p><p>SYSCALL also saves RFLAGS into R11 and then masks RFLAGS using the IA32_FMASK MSR (MSR address C0000084H); specifically, the processor clears in RFLAGS every bit corresponding to a bit that is set in the IA32_FMASK MSR.</p><p>SYSCALL loads the CS and SS selectors with values derived from bits 47:32 of the IA32_STAR MSR. However, the CS and SS descriptor caches are <strong>not</strong> loaded from the descriptors (in GDT or LDT) referenced by those selectors. Instead, the descriptor caches are loaded with fixed values. See the Operation section for details. It is the responsibility of OS software to ensure that the descriptors (in GDT or LDT) referenced by those selector values correspond to the fixed values loaded into the descriptor caches; the SYSCALL instruction does not ensure this correspondence.</p><p>The SYSCALL instruction does not save the stack pointer (RSP). If the OS system-call handler will change the stack pointer, it is the responsibility of software to save the previous value of the stack pointer. This might be done prior to executing SYSCALL, with software restoring the stack pointer with the instruction following SYSCALL (which will be executed after SYSRET). Alternatively, the OS system-call handler may save the stack pointer and restore it before executing SYSRET.</p><p>When shadow stacks are enabled at a privilege level where the SYSCALL instruction is invoked, the SSP is saved to the IA32_PL3_SSP MSR. If shadow stacks are enabled at privilege level 0, the SSP is loaded with 0. Refer to Chapter 6, \u201cProcedure Calls, Interrupts, and Exceptions\u201d and Chapter 18, \u201cControl-Flow Enforcement Technology (CET)\u201d in the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1</em> for additional CET details.</p>",
                 "tooltip": "SYSCALL invokes an OS system-call handler at privilege level 0. It does so by loading RIP from the IA32_LSTAR MSR (after saving the address of the instruction following SYSCALL into RCX). (The WRMSR instruction ensures that the IA32_LSTAR MSR always contain a canonical address.)",
                 "url": "http://www.felixcloutier.com/x86/SYSCALL.html"
             };
@@ -3432,7 +3385,7 @@ export function getAsmOpcode(opcode) {
 
         case "SYSRET":
             return {
-                "html": "<p>SYSRET is a companion instruction to the SYSCALL instruction. It returns from an OS system-call handler to user code at privilege level 3. It does so by loading RIP from RCX and loading RFLAGS from R11.<sup>1</sup> With a 64-bit operand size, SYSRET remains in 64-bit mode; otherwise, it enters compatibility mode and only the low 32 bits of the registers are loaded.</p><p>SYSRET loads the CS and SS selectors with values derived from bits 63:48 of the IA32_STAR MSR. However, the CS and SS descriptor caches are <strong>not</strong> loaded from the descriptors (in GDT or LDT) referenced by those selectors. Instead, the descriptor caches are loaded with fixed values. See the Operation section for details. It is the responsibility of OS software to ensure that the descriptors (in GDT or LDT) referenced by those selector values correspond to the fixed values loaded into the descriptor caches; the SYSRET instruction does not ensure this correspondence.</p><p>The SYSRET instruction does not modify the stack pointer (ESP or RSP). For that reason, it is necessary for software to switch to the user stack. The OS may load the user stack pointer (if it was saved after SYSCALL) before executing SYSRET; alternatively, user code may load the stack pointer (if it was saved before SYSCALL) after receiving control from SYSRET.</p><p>If the OS loads the stack pointer before executing SYSRET, it must ensure that the handler of any interrupt or exception delivered between restoring the stack pointer and successful execution of SYSRET is not invoked with the user stack. It can do so using approaches such as the following:</p><p><strong>Instruction ordering.</strong> Instructions following a SYSRET may be fetched from memory before earlier instructions complete execution, but they will not execute (even speculatively) until all instructions prior to the SYSRET have completed execution (the later instructions may execute before data stored by the earlier instructions have become globally visible).</p>",
+                "html": "<p>SYSRET is a companion instruction to the SYSCALL instruction. It returns from an OS system-call handler to user code at privilege level 3. It does so by loading RIP from RCX and loading RFLAGS from R11.<sup>1</sup> With a 64-bit operand size, SYSRET remains in 64-bit mode; otherwise, it enters compatibility mode and only the low 32 bits of the registers are loaded.</p><p>SYSRET loads the CS and SS selectors with values derived from bits 63:48 of the IA32_STAR MSR. However, the CS and SS descriptor caches are <strong>not</strong> loaded from the descriptors (in GDT or LDT) referenced by those selectors. Instead, the descriptor caches are loaded with fixed values. See the Operation section for details. It is the responsibility of OS software to ensure that the descriptors (in GDT or LDT) referenced by those selector values correspond to the fixed values loaded into the descriptor caches; the SYSRET instruction does not ensure this correspondence.</p><p>The SYSRET instruction does not modify the stack pointer (ESP or RSP). For that reason, it is necessary for software to switch to the user stack. The OS may load the user stack pointer (if it was saved after SYSCALL) before executing SYSRET; alternatively, user code may load the stack pointer (if it was saved before SYSCALL) after receiving control from SYSRET.</p><p>If the OS loads the stack pointer before executing SYSRET, it must ensure that the handler of any interrupt or exception delivered between restoring the stack pointer and successful execution of SYSRET is not invoked with the user stack. It can do so using approaches such as the following:</p><p>When shadow stacks are enabled at privilege level 3 the instruction loads SSP with value from IA32_PL3_SSP MSR. Refer to Chapter 6, \u201cProcedure Calls, Interrupts, and Exceptions\u201d and Chapter 18, \u201cControl-Flow Enforcement Technology (CET)\u201d in the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 1</em> for additional CET details.</p>",
                 "tooltip": "SYSRET is a companion instruction to the SYSCALL instruction. It returns from an OS system-call handler to user code at privilege level 3. It does so by loading RIP from RCX and loading RFLAGS from R11.1 With a 64-bit operand size, SYSRET remains in 64-bit mode; otherwise, it enters compatibility mode and only the low 32 bits of the registers are loaded.",
                 "url": "http://www.felixcloutier.com/x86/SYSRET.html"
             };
@@ -3469,7 +3422,7 @@ export function getAsmOpcode(opcode) {
         case "UCOMISS":
         case "VUCOMISS":
             return {
-                "html": "<p>Compares the single-precision floating-point values in the low doublewords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).</p><p>Operand 1 is an XMM register; operand 2 can be an XMM register or a 32 bit memory location.</p><p>The UCOMISS instruction differs from the COMISS instruction in that it signals a SIMD floating-point invalid operation exception (#I) only if a source operand is an SNaN. The COMISS instruction signals an invalid numeric exception when a source operand is either a QNaN or SNaN.</p><p>The EFLAGS register is not updated if an unmasked SIMD floating-point exception is generated.</p><p>Note: VEX.vvvv and EVEX.vvvv are reserved and must be 1111b, otherwise instructions will #UD.</p>",
+                "html": "<p>Compares the single-precision floating-point values in the low doublewords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).</p><p>Operand 1 is an XMM register; operand 2 can be an XMM register or a 32 bit memory location.</p><p>The UCOMISS instruction differs from the COMISS instruction in that it signals a SIMD floating-point invalid operation exception (#I) only if a source operand is an SNaN. The COMISS instruction signals an invalid operation exception when a source operand is either a QNaN or SNaN.</p><p>The EFLAGS register is not updated if an unmasked SIMD floating-point exception is generated.</p><p>Note: VEX.vvvv and EVEX.vvvv are reserved and must be 1111b, otherwise instructions will #UD.</p>",
                 "tooltip": "Compares the single-precision floating-point values in the low doublewords of operand 1 (first operand) and operand 2 (second operand), and sets the ZF, PF, and CF flags in the EFLAGS register according to the result (unordered, greater than, less than, or equal). The OF, SF and AF flags in the EFLAGS register are set to 0. The unordered result is returned if either source operand is a NaN (QNaN or SNaN).",
                 "url": "http://www.felixcloutier.com/x86/UCOMISS.html"
             };
@@ -3589,9 +3542,23 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/VCOMPRESSPS.html"
             };
 
+        case "VCVTNE2PS2BF16":
+            return {
+                "html": "<p>Converts two SIMD registers of packed single data into a single register of packed BF16 data.</p><p>This instruction does not support memory fault suppression.</p><p>This instruction uses \u201cRound to nearest (even)\u201d rounding mode. Output denormals are always flushed to zero and input denormals are always treated as zero. MXCSR is not consulted nor updated. No floating-point exceptions are generated.</p>",
+                "tooltip": "Converts two SIMD registers of packed single data into a single register of packed BF16 data.",
+                "url": "http://www.felixcloutier.com/x86/VCVTNE2PS2BF16.html"
+            };
+
+        case "VCVTNEPS2BF16":
+            return {
+                "html": "<p>Converts one SIMD register of packed single data into a single register of packed BF16 data.</p><p>This instruction uses \u201cRound to nearest (even)\u201d rounding mode. Output denormals are always flushed to zero and input denormals are always treated as zero. MXCSR is not consulted nor updated.</p><p>As the instruction operand encoding table shows, the EVEX.vvvv field is not used for encoding an operand. EVEX.vvvv is reserved and must be 0b1111 otherwise instructions will #UD.</p>",
+                "tooltip": "Converts one SIMD register of packed single data into a single register of packed BF16 data.",
+                "url": "http://www.felixcloutier.com/x86/VCVTNEPS2BF16.html"
+            };
+
         case "VCVTPD2QQ":
             return {
-                "html": "<p>Converts packed double-precision floating-point values in the source operand (second operand) to packed quadword integers in the destination operand (first operand).</p><p>EVEX encoded versions: The source operand is a ZMM/YMM/XMM register or a 512/256/128-bit memory location. The destination operation is a ZMM/YMM/XMM register conditionally updated with writemask k1.</p><p>When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register or the embedded rounding control bits. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2<sup>w-1</sup>, where w represents the number of bits in the destination format) is returned.</p><p>EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.</p>",
+                "html": "<p>Converts packed double-precision floating-point values in the source operand (second operand) to packed quadword integers in the destination operand (first operand).</p><p>EVEX encoded versions: The source operand is a ZMM/YMM/XMM register or a 512/256/128-bit memory location. The destination operation is a ZMM/YMM/XMM register conditionally updated with writemask k1.</p><p>When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register or the embedded rounding control bits. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2<sup>w</sup>-1, where w represents the number of bits in the destination format) is returned.</p><p>EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.</p>",
                 "tooltip": "Converts packed double-precision floating-point values in the source operand (second operand) to packed quadword integers in the destination operand (first operand).",
                 "url": "http://www.felixcloutier.com/x86/VCVTPD2QQ.html"
             };
@@ -3626,7 +3593,7 @@ export function getAsmOpcode(opcode) {
 
         case "VCVTPS2QQ":
             return {
-                "html": "<p>Converts eight packed single-precision floating-point values in the source operand to eight signed quadword integers in the destination operand.</p><p>When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register or the embedded rounding control bits. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2<sup>w-1</sup>, where w represents the number of bits in the destination format) is returned.</p><p>The source operand is a YMM/XMM/XMM (low 64- bits) register or a 256/128/64-bit memory location. The destination operation is a ZMM/YMM/XMM register conditionally updated with writemask k1.</p><p>Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.</p>",
+                "html": "<p>Converts eight packed single-precision floating-point values in the source operand to eight signed quadword integers in the destination operand.</p><p>When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register or the embedded rounding control bits. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2<sup>w</sup>-1, where w represents the number of bits in the destination format) is returned.</p><p>The source operand is a YMM/XMM/XMM (low 64- bits) register or a 256/128/64-bit memory location. The destination operation is a ZMM/YMM/XMM register conditionally updated with writemask k1.</p><p>Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.</p>",
                 "tooltip": "Converts eight packed single-precision floating-point values in the source operand to eight signed quadword integers in the destination operand.",
                 "url": "http://www.felixcloutier.com/x86/VCVTPS2QQ.html"
             };
@@ -3675,56 +3642,56 @@ export function getAsmOpcode(opcode) {
 
         case "VCVTTPD2QQ":
             return {
-                "html": "<p>Converts with truncation packed double-precision floating-point values in the source operand (second operand) to packed quadword integers in the destination operand (first operand).</p><p>EVEX encoded versions: The source operand is a ZMM/YMM/XMM register or a 512/256/128-bit memory location. The destination operand is a ZMM/YMM/XMM register conditionally updated with writemask k1.</p><p>When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2<sup>w-1</sup>, where w represents the number of bits in the destination format) is returned.</p><p>Note: EVEX.vvvv is reserved and must be 1111b, otherwise instructions will #UD.</p>",
+                "html": "<p>Converts with truncation packed double-precision floating-point values in the source operand (second operand) to packed quadword integers in the destination operand (first operand).</p><p>EVEX encoded versions: The source operand is a ZMM/YMM/XMM register or a 512/256/128-bit memory location. The destination operand is a ZMM/YMM/XMM register conditionally updated with writemask k1.</p><p>When a conversion is inexact, a truncated (round toward zero) value is returned. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2<sup>w</sup>-1, where w represents the number of bits in the destination format) is returned.</p><p>Note: EVEX.vvvv is reserved and must be 1111b, otherwise instructions will #UD.</p>",
                 "tooltip": "Converts with truncation packed double-precision floating-point values in the source operand (second operand) to packed quadword integers in the destination operand (first operand).",
                 "url": "http://www.felixcloutier.com/x86/VCVTTPD2QQ.html"
             };
 
         case "VCVTTPD2UDQ":
             return {
-                "html": "<p>Converts with truncation packed double-precision floating-point values in the source operand (the second operand) to packed unsigned doubleword integers in the destination operand (the first operand).</p><p>When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the integer value 2<sup>w</sup> \u2013 1 is returned, where w represents the number of bits in the destination format.</p><p>The source operand is a ZMM/YMM/XMM register, a 512/256/128-bit memory location, or a 512/256/128-bit vector broadcasted from a 64-bit memory location. The destination operand is a YMM/XMM/XMM (low 64 bits) register conditionally updated with writemask k1. The upper bits (MAXVL-1:256) of the corresponding destination are zeroed.</p><p>Note: EVEX.vvvv is reserved and must be 1111b, otherwise instructions will #UD.</p>",
+                "html": "<p>Converts with truncation packed double-precision floating-point values in the source operand (the second operand) to packed unsigned doubleword integers in the destination operand (the first operand).</p><p>When a conversion is inexact, a truncated (round toward zero) value is returned. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the integer value 2<sup>w</sup> \u2013 1 is returned, where w represents the number of bits in the destination format.</p><p>The source operand is a ZMM/YMM/XMM register, a 512/256/128-bit memory location, or a 512/256/128-bit vector broadcasted from a 64-bit memory location. The destination operand is a YMM/XMM/XMM (low 64 bits) register conditionally updated with writemask k1. The upper bits (MAXVL-1:256) of the corresponding destination are zeroed.</p><p>Note: EVEX.vvvv is reserved and must be 1111b, otherwise instructions will #UD.</p>",
                 "tooltip": "Converts with truncation packed double-precision floating-point values in the source operand (the second operand) to packed unsigned doubleword integers in the destination operand (the first operand).",
                 "url": "http://www.felixcloutier.com/x86/VCVTTPD2UDQ.html"
             };
 
         case "VCVTTPD2UQQ":
             return {
-                "html": "<p>Converts with truncation packed double-precision floating-point values in the source operand (second operand) to packed unsigned quadword integers in the destination operand (first operand).</p><p>When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the integer value 2<sup>w</sup> \u2013 1 is returned, where w represents the number of bits in the destination format.</p><p>EVEX encoded versions: The source operand is a ZMM/YMM/XMM register or a 512/256/128-bit memory location. The destination operation is a ZMM/YMM/XMM register conditionally updated with writemask k1.</p><p>Note: EVEX.vvvv is reserved and must be 1111b, otherwise instructions will #UD.</p>",
+                "html": "<p>Converts with truncation packed double-precision floating-point values in the source operand (second operand) to packed unsigned quadword integers in the destination operand (first operand).</p><p>When a conversion is inexact, a truncated (round toward zero) value is returned. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the integer value 2<sup>w</sup> \u2013 1 is returned, where w represents the number of bits in the destination format.</p><p>EVEX encoded versions: The source operand is a ZMM/YMM/XMM register or a 512/256/128-bit memory location. The destination operation is a ZMM/YMM/XMM register conditionally updated with writemask k1.</p><p>Note: EVEX.vvvv is reserved and must be 1111b, otherwise instructions will #UD.</p>",
                 "tooltip": "Converts with truncation packed double-precision floating-point values in the source operand (second operand) to packed unsigned quadword integers in the destination operand (first operand).",
                 "url": "http://www.felixcloutier.com/x86/VCVTTPD2UQQ.html"
             };
 
         case "VCVTTPS2QQ":
             return {
-                "html": "<p>Converts with truncation packed single-precision floating-point values in the source operand to eight signed quadword integers in the destination operand.</p><p>When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2<sup>w-1</sup>, where w represents the number of bits in the destination format) is returned.</p><p>EVEX encoded versions: The source operand is a YMM/XMM/XMM (low 64 bits) register or a 256/128/64-bit memory location. The destination operation is a vector register conditionally updated with writemask k1.</p><p>Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.</p>",
+                "html": "<p>Converts with truncation packed single-precision floating-point values in the source operand to eight signed quadword integers in the destination operand.</p><p>When a conversion is inexact, a truncated (round toward zero) value is returned. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the indefinite integer value (2<sup>w</sup>-1, where w represents the number of bits in the destination format) is returned.</p><p>EVEX encoded versions: The source operand is a YMM/XMM/XMM (low 64 bits) register or a 256/128/64-bit memory location. The destination operation is a vector register conditionally updated with writemask k1.</p><p>Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.</p>",
                 "tooltip": "Converts with truncation packed single-precision floating-point values in the source operand to eight signed quadword integers in the destination operand.",
                 "url": "http://www.felixcloutier.com/x86/VCVTTPS2QQ.html"
             };
 
         case "VCVTTPS2UDQ":
             return {
-                "html": "<p>Converts with truncation packed single-precision floating-point values in the source operand to sixteen unsigned doubleword integers in the destination operand.</p><p>When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the integer value 2<sup>w</sup> \u2013 1 is returned, where w represents the number of bits in the destination format.</p><p>EVEX encoded versions: The source operand is a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 32-bit memory location. The destination operand is a ZMM/YMM/XMM register conditionally updated with writemask k1.</p><p>Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.</p>",
+                "html": "<p>Converts with truncation packed single-precision floating-point values in the source operand to sixteen unsigned doubleword integers in the destination operand.</p><p>When a conversion is inexact, a truncated (round toward zero) value is returned. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the integer value 2<sup>w</sup> \u2013 1 is returned, where w represents the number of bits in the destination format.</p><p>EVEX encoded versions: The source operand is a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 32-bit memory location. The destination operand is a ZMM/YMM/XMM register conditionally updated with writemask k1.</p><p>Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.</p>",
                 "tooltip": "Converts with truncation packed single-precision floating-point values in the source operand to sixteen unsigned doubleword integers in the destination operand.",
                 "url": "http://www.felixcloutier.com/x86/VCVTTPS2UDQ.html"
             };
 
         case "VCVTTPS2UQQ":
             return {
-                "html": "<p>Converts with truncation up to eight packed single-precision floating-point values in the source operand to unsigned quadword integers in the destination operand.</p><p>When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the integer value 2<sup>w</sup> \u2013 1 is returned, where w represents the number of bits in the destination format.</p><p>EVEX encoded versions: The source operand is a YMM/XMM/XMM (low 64 bits) register or a 256/128/64-bit memory location. The destination operation is a vector register conditionally updated with writemask k1.</p><p>Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.</p>",
+                "html": "<p>Converts with truncation up to eight packed single-precision floating-point values in the source operand to unsigned quadword integers in the destination operand.</p><p>When a conversion is inexact, a truncated (round toward zero) value is returned. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the integer value 2<sup>w</sup> \u2013 1 is returned, where w represents the number of bits in the destination format.</p><p>EVEX encoded versions: The source operand is a YMM/XMM/XMM (low 64 bits) register or a 256/128/64-bit memory location. The destination operation is a vector register conditionally updated with writemask k1.</p><p>Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.</p>",
                 "tooltip": "Converts with truncation up to eight packed single-precision floating-point values in the source operand to unsigned quadword integers in the destination operand.",
                 "url": "http://www.felixcloutier.com/x86/VCVTTPS2UQQ.html"
             };
 
         case "VCVTTSD2USI":
             return {
-                "html": "<p>Converts with truncation a double-precision floating-point value in the source operand (the second operand) to an unsigned doubleword integer (or unsigned quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a 64-bit memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the double-precision floating-point value is contained in the low quadword of the register.</p><p>When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the integer value 2<sup>w</sup> \u2013 1 is returned, where w represents the number of bits in the destination format.</p><p>EVEX.W1 version: promotes the instruction to produce 64-bit data in 64-bit mode.</p>",
+                "html": "<p>Converts with truncation a double-precision floating-point value in the source operand (the second operand) to an unsigned doubleword integer (or unsigned quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a 64-bit memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the double-precision floating-point value is contained in the low quadword of the register.</p><p>When a conversion is inexact, a truncated (round toward zero) value is returned. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the integer value 2<sup>w</sup> \u2013 1 is returned, where w represents the number of bits in the destination format.</p><p>EVEX.W1 version: promotes the instruction to produce 64-bit data in 64-bit mode.</p>",
                 "tooltip": "Converts with truncation a double-precision floating-point value in the source operand (the second operand) to an unsigned doubleword integer (or unsigned quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a 64-bit memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the double-precision floating-point value is contained in the low quadword of the register.",
                 "url": "http://www.felixcloutier.com/x86/VCVTTSD2USI.html"
             };
 
         case "VCVTTSS2USI":
             return {
-                "html": "<p>Converts with truncation a single-precision floating-point value in the source operand (the second operand) to an unsigned doubleword integer (or unsigned quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the single-precision floating-point value is contained in the low doubleword of the register.</p><p>When a conversion is inexact, the value returned is rounded according to the rounding control bits in the MXCSR register. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the integer value 2<sup>w</sup> \u2013 1 is returned, where w represents the number of bits in the destination format.</p><p>EVEX.W1 version: promotes the instruction to produce 64-bit data in 64-bit mode.</p><p>Note: EVEX.vvvv is reserved and must be 1111b, otherwise instructions will #UD.</p>",
+                "html": "<p>Converts with truncation a single-precision floating-point value in the source operand (the second operand) to an unsigned doubleword integer (or unsigned quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the single-precision floating-point value is contained in the low doubleword of the register.</p><p>When a conversion is inexact, a truncated (round toward zero) value is returned. If a converted result cannot be represented in the destination format, the floating-point invalid exception is raised, and if this exception is masked, the integer value 2<sup>w</sup> \u2013 1 is returned, where w represents the number of bits in the destination format.</p><p>EVEX.W1 version: promotes the instruction to produce 64-bit data in 64-bit mode.</p><p>Note: EVEX.vvvv is reserved and must be 1111b, otherwise instructions will #UD.</p>",
                 "tooltip": "Converts with truncation a single-precision floating-point value in the source operand (the second operand) to an unsigned doubleword integer (or unsigned quadword integer if operand size is 64 bits) in the destination operand (the first operand). The source operand can be an XMM register or a memory location. The destination operand is a general-purpose register. When the source operand is an XMM register, the single-precision floating-point value is contained in the low doubleword of the register.",
                 "url": "http://www.felixcloutier.com/x86/VCVTTSS2USI.html"
             };
@@ -3778,6 +3745,13 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/VDBPSADBW.html"
             };
 
+        case "VDPBF16PS":
+            return {
+                "html": "<p>This instruction performs a SIMD dot-product of two BF16 pairs and accumulates into a packed single precision register.</p><p>\u201cRound to nearest even\u201d rounding mode is used when doing each accumulation of the FMA. Output denormals are always flushed to zero and input denormals are always treated as zero. MXCSR is not consulted nor updated.</p><p>NaN propagation priorities are described in <a href=\"http://www.felixcloutier.com/x86/VDPBF16PS.html#tbl-5-1\" rel=\"noreferrer noopener\" target=\"_blank\">Table 5-1</a>.</p>",
+                "tooltip": "This instruction performs a SIMD dot-product of two BF16 pairs and accumulates into a packed single precision register.",
+                "url": "http://www.felixcloutier.com/x86/VDPBF16PS.html"
+            };
+
         case "VERR":
         case "VERW":
             return {
@@ -3836,28 +3810,28 @@ export function getAsmOpcode(opcode) {
 
         case "VFIXUPIMMPD":
             return {
-                "html": "<p>Perform fix-up of quad-word elements encoded in double-precision floating-point format in the first source operand (the second operand) using a 32-bit, two-level look-up table specified in the corresponding quadword element of the second source operand (the third operand) with exception reporting specifier imm8. The elements that are fixed-up are selected by mask bits of 1 specified in the opmask k1. Mask bits of 0 in the opmask k1 or table response action of 0000b preserves the corresponding element of the first operand. The fixed-up elements from the first source operand and the preserved element in the first operand are combined as the final results in the destination operand (the first operand).</p><p>The destination and the first source operands are ZMM/YMM/XMM registers. The second source operand can be a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 64-bit memory location.</p><p>The two-level look-up table perform a fix-up of each DP FP input data in the first source operand by decoding the input data encoding into 8 token types. A response table is defined for each token type that converts the input encoding in the first source operand with one of 16 response actions.</p><p>This instruction is specifically intended for use in fixing up the results of arithmetic calculations involving one source so that they match the spec, although it is generally useful for fixing up the results of multiple-instruction sequences to reflect special-number inputs. For example, consider rcp(0). Input 0 to rcp, and you should get INF according to the DX10 spec. However, evaluating rcp via Newton-Raphson, where x=approx(1/0), yields an incorrect result. To deal with this, VFIXUPIMMPD can be used after the N-R reciprocal sequence to set the result to the correct value (i.e. INF when the input is 0).</p><p>If MXCSR.DAZ is not set, denormal input elements in the first source operand are considered as normal inputs and do not trigger any fixup nor fault reporting.</p>",
+                "html": "<p>Perform fix-up of quad-word elements encoded in double-precision floating-point format in the first source operand (the second operand) using a 32-bit, two-level look-up table specified in the corresponding quadword element of the second source operand (the third operand) with exception reporting specifier imm8. The elements that are fixed-up are selected by mask bits of 1 specified in the opmask k1. Mask bits of 0 in the opmask k1 or table response action of 0000b preserves the corresponding element of the first operand. The fixed-up elements from the first source operand and the preserved element in the first operand are combined as the final results in the destination operand (the first operand).</p><p>The destination and the first source operands are ZMM/YMM/XMM registers. The second source operand can be a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 64-bit memory location.</p><p>The two-level look-up table perform a fix-up of each DP FP input data in the first source operand by decoding the input data encoding into 8 token types. A response table is defined for each token type that converts the input encoding in the first source operand with one of 16 response actions.</p><p>This instruction is specifically intended for use in fixing up the results of arithmetic calculations involving one source so that they match the spec, although it is generally useful for fixing up the results of multiple-instruction sequences to reflect special-number inputs. For example, consider rcp(0). Input 0 to rcp, and you should get INF according to the DX10 spec. However, evaluating rcp via Newton-Raphson, where x=approx(1/0), yields an incorrect result. To deal with this, VFIXUPIMMPD can be used after the N-R reciprocal sequence to set the result to the correct value (i.e., INF when the input is 0).</p><p>If MXCSR.DAZ is not set, denormal input elements in the first source operand are considered as normal inputs and do not trigger any fixup nor fault reporting.</p>",
                 "tooltip": "Perform fix-up of quad-word elements encoded in double-precision floating-point format in the first source operand (the second operand) using a 32-bit, two-level look-up table specified in the corresponding quadword element of the second source operand (the third operand) with exception reporting specifier imm8. The elements that are fixed-up are selected by mask bits of 1 specified in the opmask k1. Mask bits of 0 in the opmask k1 or table response action of 0000b preserves the corresponding element of the first operand. The fixed-up elements from the first source operand and the preserved element in the first operand are combined as the final results in the destination operand (the first operand).",
                 "url": "http://www.felixcloutier.com/x86/VFIXUPIMMPD.html"
             };
 
         case "VFIXUPIMMPS":
             return {
-                "html": "<p>Perform fix-up of doubleword elements encoded in single-precision floating-point format in the first source operand (the second operand) using a 32-bit, two-level look-up table specified in the corresponding doubleword element of the second source operand (the third operand) with exception reporting specifier imm8. The elements that are fixed-up are selected by mask bits of 1 specified in the opmask k1. Mask bits of 0 in the opmask k1 or table response action of 0000b preserves the corresponding element of the first operand. The fixed-up elements from the first source operand and the preserved element in the first operand are combined as the final results in the destination operand (the first operand).</p><p>The destination and the first source operands are ZMM/YMM/XMM registers. The second source operand can be a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 64-bit memory location.</p><p>The two-level look-up table perform a fix-up of each SP FP input data in the first source operand by decoding the input data encoding into 8 token types. A response table is defined for each token type that converts the input encoding in the first source operand with one of 16 response actions.</p><p>This instruction is specifically intended for use in fixing up the results of arithmetic calculations involving one source so that they match the spec, although it is generally useful for fixing up the results of multiple-instruction sequences to reflect special-number inputs. For example, consider rcp(0). Input 0 to rcp, and you should get INF according to the DX10 spec. However, evaluating rcp via Newton-Raphson, where x=approx(1/0), yields an incorrect result. To deal with this, VFIXUPIMMPS can be used after the N-R reciprocal sequence to set the result to the correct value (i.e. INF when the input is 0).</p><p>If MXCSR.DAZ is not set, denormal input elements in the first source operand are considered as normal inputs and do not trigger any fixup nor fault reporting.</p>",
+                "html": "<p>Perform fix-up of doubleword elements encoded in single-precision floating-point format in the first source operand (the second operand) using a 32-bit, two-level look-up table specified in the corresponding doubleword element of the second source operand (the third operand) with exception reporting specifier imm8. The elements that are fixed-up are selected by mask bits of 1 specified in the opmask k1. Mask bits of 0 in the opmask k1 or table response action of 0000b preserves the corresponding element of the first operand. The fixed-up elements from the first source operand and the preserved element in the first operand are combined as the final results in the destination operand (the first operand).</p><p>The destination and the first source operands are ZMM/YMM/XMM registers. The second source operand can be a ZMM/YMM/XMM register, a 512/256/128-bit memory location or a 512/256/128-bit vector broadcasted from a 64-bit memory location.</p><p>The two-level look-up table perform a fix-up of each SP FP input data in the first source operand by decoding the input data encoding into 8 token types. A response table is defined for each token type that converts the input encoding in the first source operand with one of 16 response actions.</p><p>This instruction is specifically intended for use in fixing up the results of arithmetic calculations involving one source so that they match the spec, although it is generally useful for fixing up the results of multiple-instruction sequences to reflect special-number inputs. For example, consider rcp(0). Input 0 to rcp, and you should get INF according to the DX10 spec. However, evaluating rcp via Newton-Raphson, where x=approx(1/0), yields an incorrect result. To deal with this, VFIXUPIMMPS can be used after the N-R reciprocal sequence to set the result to the correct value (i.e., INF when the input is 0).</p><p>If MXCSR.DAZ is not set, denormal input elements in the first source operand are considered as normal inputs and do not trigger any fixup nor fault reporting.</p>",
                 "tooltip": "Perform fix-up of doubleword elements encoded in single-precision floating-point format in the first source operand (the second operand) using a 32-bit, two-level look-up table specified in the corresponding doubleword element of the second source operand (the third operand) with exception reporting specifier imm8. The elements that are fixed-up are selected by mask bits of 1 specified in the opmask k1. Mask bits of 0 in the opmask k1 or table response action of 0000b preserves the corresponding element of the first operand. The fixed-up elements from the first source operand and the preserved element in the first operand are combined as the final results in the destination operand (the first operand).",
                 "url": "http://www.felixcloutier.com/x86/VFIXUPIMMPS.html"
             };
 
         case "VFIXUPIMMSD":
             return {
-                "html": "<p>Perform a fix-up of the low quadword element encoded in double-precision floating-point format in the first source operand (the second operand) using a 32-bit, two-level look-up table specified in the low quadword element of the second source operand (the third operand) with exception reporting specifier imm8. The element that is fixed-up is selected by mask bit of 1 specified in the opmask k1. Mask bit of 0 in the opmask k1 or table response action of 0000b preserves the corresponding element of the first operand. The fixed-up element from the first source operand or the preserved element in the first operand becomes the low quadword element of the destination operand (the first operand). Bits 127:64 of the destination operand is copied from the corresponding bits of the first source operand. The destination and first source operands are XMM registers. The second source operand can be a XMM register or a 64- bit memory location.</p><p>The two-level look-up table perform a fix-up of each DP FP input data in the first source operand by decoding the input data encoding into 8 token types. A response table is defined for each token type that converts the input encoding in the first source operand with one of 16 response actions.</p><p>This instruction is specifically intended for use in fixing up the results of arithmetic calculations involving one source so that they match the spec, although it is generally useful for fixing up the results of multiple-instruction sequences to reflect special-number inputs. For example, consider rcp(0). Input 0 to rcp, and you should get INF according to the DX10 spec. However, evaluating rcp via Newton-Raphson, where x=approx(1/0), yields an incorrect result. To deal with this, VFIXUPIMMPD can be used after the N-R reciprocal sequence to set the result to the correct value (i.e. INF when the input is 0).</p><p>If MXCSR.DAZ is not set, denormal input elements in the first source operand are considered as normal inputs and do not trigger any fixup nor fault reporting.</p><p>Imm8 is used to set the required flags reporting. It supports #ZE and #IE fault reporting (see details below).</p>",
+                "html": "<p>Perform a fix-up of the low quadword element encoded in double-precision floating-point format in the first source operand (the second operand) using a 32-bit, two-level look-up table specified in the low quadword element of the second source operand (the third operand) with exception reporting specifier imm8. The element that is fixed-up is selected by mask bit of 1 specified in the opmask k1. Mask bit of 0 in the opmask k1 or table response action of 0000b preserves the corresponding element of the first operand. The fixed-up element from the first source operand or the preserved element in the first operand becomes the low quadword element of the destination operand (the first operand). Bits 127:64 of the destination operand is copied from the corresponding bits of the first source operand. The destination and first source operands are XMM registers. The second source operand can be a XMM register or a 64- bit memory location.</p><p>The two-level look-up table perform a fix-up of each DP FP input data in the first source operand by decoding the input data encoding into 8 token types. A response table is defined for each token type that converts the input encoding in the first source operand with one of 16 response actions.</p><p>This instruction is specifically intended for use in fixing up the results of arithmetic calculations involving one source so that they match the spec, although it is generally useful for fixing up the results of multiple-instruction sequences to reflect special-number inputs. For example, consider rcp(0). Input 0 to rcp, and you should get INF according to the DX10 spec. However, evaluating rcp via Newton-Raphson, where x=approx(1/0), yields an incorrect result. To deal with this, VFIXUPIMMPD can be used after the N-R reciprocal sequence to set the result to the correct value (i.e., INF when the input is 0).</p><p>If MXCSR.DAZ is not set, denormal input elements in the first source operand are considered as normal inputs and do not trigger any fixup nor fault reporting.</p><p>Imm8 is used to set the required flags reporting. It supports #ZE and #IE fault reporting (see details below).</p>",
                 "tooltip": "Perform a fix-up of the low quadword element encoded in double-precision floating-point format in the first source operand (the second operand) using a 32-bit, two-level look-up table specified in the low quadword element of the second source operand (the third operand) with exception reporting specifier imm8. The element that is fixed-up is selected by mask bit of 1 specified in the opmask k1. Mask bit of 0 in the opmask k1 or table response action of 0000b preserves the corresponding element of the first operand. The fixed-up element from the first source operand or the preserved element in the first operand becomes the low quadword element of the destination operand (the first operand). Bits 127:64 of the destination operand is copied from the corresponding bits of the first source operand. The destination and first source operands are XMM registers. The second source operand can be a XMM register or a 64- bit memory location.",
                 "url": "http://www.felixcloutier.com/x86/VFIXUPIMMSD.html"
             };
 
         case "VFIXUPIMMSS":
             return {
-                "html": "<p>Perform a fix-up of the low doubleword element encoded in single-precision floating-point format in the first source operand (the second operand) using a 32-bit, two-level look-up table specified in the low doubleword element of the second source operand (the third operand) with exception reporting specifier imm8. The element that is fixed-up is selected by mask bit of 1 specified in the opmask k1. Mask bit of 0 in the opmask k1 or table response action of 0000b preserves the corresponding element of the first operand. The fixed-up element from the first source operand or the preserved element in the first operand becomes the low doubleword element of the destination operand (the first operand) Bits 127:32 of the destination operand is copied from the corresponding bits of the first source operand. The destination and first source operands are XMM registers. The second source operand can be a XMM register or a 32-bit memory location.</p><p>The two-level look-up table perform a fix-up of each SP FP input data in the first source operand by decoding the input data encoding into 8 token types. A response table is defined for each token type that converts the input encoding in the first source operand with one of 16 response actions.</p><p>This instruction is specifically intended for use in fixing up the results of arithmetic calculations involving one source so that they match the spec, although it is generally useful for fixing up the results of multiple-instruction sequences to reflect special-number inputs. For example, consider rcp(0). Input 0 to rcp, and you should get INF according to the DX10 spec. However, evaluating rcp via Newton-Raphson, where x=approx(1/0), yields an incorrect result. To deal with this, VFIXUPIMMPD can be used after the N-R reciprocal sequence to set the result to the correct value (i.e. INF when the input is 0).</p><p>If MXCSR.DAZ is not set, denormal input elements in the first source operand are considered as normal inputs and do not trigger any fixup nor fault reporting.</p><p>Imm8 is used to set the required flags reporting. It supports #ZE and #IE fault reporting (see details below).</p>",
+                "html": "<p>Perform a fix-up of the low doubleword element encoded in single-precision floating-point format in the first source operand (the second operand) using a 32-bit, two-level look-up table specified in the low doubleword element of the second source operand (the third operand) with exception reporting specifier imm8. The element that is fixed-up is selected by mask bit of 1 specified in the opmask k1. Mask bit of 0 in the opmask k1 or table response action of 0000b preserves the corresponding element of the first operand. The fixed-up element from the first source operand or the preserved element in the first operand becomes the low doubleword element of the destination operand (the first operand) Bits 127:32 of the destination operand is copied from the corresponding bits of the first source operand. The destination and first source operands are XMM registers. The second source operand can be a XMM register or a 32-bit memory location.</p><p>The two-level look-up table perform a fix-up of each SP FP input data in the first source operand by decoding the input data encoding into 8 token types. A response table is defined for each token type that converts the input encoding in the first source operand with one of 16 response actions.</p><p>This instruction is specifically intended for use in fixing up the results of arithmetic calculations involving one source so that they match the spec, although it is generally useful for fixing up the results of multiple-instruction sequences to reflect special-number inputs. For example, consider rcp(0). Input 0 to rcp, and you should get INF according to the DX10 spec. However, evaluating rcp via Newton-Raphson, where x=approx(1/0), yields an incorrect result. To deal with this, VFIXUPIMMPD can be used after the N-R reciprocal sequence to set the result to the correct value (i.e., INF when the input is 0).</p><p>If MXCSR.DAZ is not set, denormal input elements in the first source operand are considered as normal inputs and do not trigger any fixup nor fault reporting.</p><p>Imm8 is used to set the required flags reporting. It supports #ZE and #IE fault reporting (see details below).</p>",
                 "tooltip": "Perform a fix-up of the low doubleword element encoded in single-precision floating-point format in the first source operand (the second operand) using a 32-bit, two-level look-up table specified in the low doubleword element of the second source operand (the third operand) with exception reporting specifier imm8. The element that is fixed-up is selected by mask bit of 1 specified in the opmask k1. Mask bit of 0 in the opmask k1 or table response action of 0000b preserves the corresponding element of the first operand. The fixed-up element from the first source operand or the preserved element in the first operand becomes the low doubleword element of the destination operand (the first operand) Bits 127:32 of the destination operand is copied from the corresponding bits of the first source operand. The destination and first source operands are XMM registers. The second source operand can be a XMM register or a 32-bit memory location.",
                 "url": "http://www.felixcloutier.com/x86/VFIXUPIMMSS.html"
             };
@@ -4122,14 +4096,14 @@ export function getAsmOpcode(opcode) {
 
         case "VGETEXPSD":
             return {
-                "html": "<p>Extracts the biased exponent from the normalized DP FP representation of the low qword data element of the source operand (the third operand) as unbiased signed integer value, or convert the denormal representation of input data to unbiased negative integer values. The integer value of the unbiased exponent is converted to double-precision FP value and written to the destination operand (the first operand) as DP FP numbers. Bits (127:64) of the XMM register destination are copied from corresponding bits in the first source operand.</p><p>The destination must be a XMM register, the source operand can be a XMM register or a float64 memory location. The low quadword element of the destination operand is conditionally updated with writemask k1.</p><p>Each GETEXP operation converts the exponent value into a FP number (permitting input value in denormal representation). Special cases of input values are listed in <a href=\"http://www.felixcloutier.com/x86/VGETEXPPD.html#tbl-5-14\" rel=\"noreferrer noopener\" target=\"_blank\">Table 5-14</a>.</p>",
+                "html": "<p>Extracts the biased exponent from the normalized DP FP representation of the low qword data element of the source operand (the third operand) as unbiased signed integer value, or convert the denormal representation of input data to unbiased negative integer values. The integer value of the unbiased exponent is converted to double-precision FP value and written to the destination operand (the first operand) as DP FP numbers. Bits (127:64) of the XMM register destination are copied from corresponding bits in the first source operand.</p><p>The destination must be a XMM register, the source operand can be a XMM register or a float64 memory location.</p><p>If writemasking is used, the low quadword element of the destination operand is conditionally updated depending on the value of writemask register k1. If writemasking is not used, the low quadword element of the destination operand is unconditionally updated.</p><p>Each GETEXP operation converts the exponent value into a FP number (permitting input value in denormal representation). Special cases of input values are listed in <a href=\"http://www.felixcloutier.com/x86/VGETEXPPD.html#tbl-5-14\" rel=\"noreferrer noopener\" target=\"_blank\">Table 5-14</a>.</p>",
                 "tooltip": "Extracts the biased exponent from the normalized DP FP representation of the low qword data element of the source operand (the third operand) as unbiased signed integer value, or convert the denormal representation of input data to unbiased negative integer values. The integer value of the unbiased exponent is converted to double-precision FP value and written to the destination operand (the first operand) as DP FP numbers. Bits (127:64) of the XMM register destination are copied from corresponding bits in the first source operand.",
                 "url": "http://www.felixcloutier.com/x86/VGETEXPSD.html"
             };
 
         case "VGETEXPSS":
             return {
-                "html": "<p>Extracts the biased exponent from the normalized SP FP representation of the low doubleword data element of the source operand (the third operand) as unbiased signed integer value, or convert the denormal representation of input data to unbiased negative integer values. The integer value of the unbiased exponent is converted to single-precision FP value and written to the destination operand (the first operand) as SP FP numbers. Bits (127:32) of the XMM register destination are copied from corresponding bits in the first source operand.</p><p>The destination must be a XMM register, the source operand can be a XMM register or a float32 memory location. The the low doubleword element of the destination operand is conditionally updated with writemask k1.</p><p>Each GETEXP operation converts the exponent value into a FP number (permitting input value in denormal representation). Special cases of input values are listed in <a href=\"http://www.felixcloutier.com/x86/VGETEXPPS.html#tbl-5-15\" rel=\"noreferrer noopener\" target=\"_blank\">Table 5-15</a>.</p>",
+                "html": "<p>Extracts the biased exponent from the normalized SP FP representation of the low doubleword data element of the source operand (the third operand) as unbiased signed integer value, or convert the denormal representation of input data to unbiased negative integer values. The integer value of the unbiased exponent is converted to single-precision FP value and written to the destination operand (the first operand) as SP FP numbers. Bits (127:32) of the XMM register destination are copied from corresponding bits in the first source operand.</p><p>The destination must be a XMM register, the source operand can be a XMM register or a float32 memory location.</p><p>If writemasking is used, the low doubleword element of the destination operand is conditionally updated depending on the value of writemask register k1. If writemasking is not used, the low doubleword element of the destination operand is unconditionally updated.</p><p>Each GETEXP operation converts the exponent value into a FP number (permitting input value in denormal representation). Special cases of input values are listed in <a href=\"http://www.felixcloutier.com/x86/VGETEXPPS.html#tbl-5-15\" rel=\"noreferrer noopener\" target=\"_blank\">Table 5-15</a>.</p>",
                 "tooltip": "Extracts the biased exponent from the normalized SP FP representation of the low doubleword data element of the source operand (the third operand) as unbiased signed integer value, or convert the denormal representation of input data to unbiased negative integer values. The integer value of the unbiased exponent is converted to single-precision FP value and written to the destination operand (the first operand) as SP FP numbers. Bits (127:32) of the XMM register destination are copied from corresponding bits in the first source operand.",
                 "url": "http://www.felixcloutier.com/x86/VGETEXPSS.html"
             };
@@ -4192,6 +4166,14 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/VMASKMOV.html"
             };
 
+        case "VP2INTERSECTD":
+        case "VP2INTERSECTQ":
+            return {
+                "html": "<p>This instruction writes an even/odd pair of mask registers. The mask register destination indicated in the MODRM.REG field is used to form the basis of the register pair. The low bit of that field is masked off (set to zero) to create the first register of the pair.</p><p>EVEX.aaa and EVEX.z must be zero.</p>",
+                "tooltip": "This instruction writes an even/odd pair of mask registers. The mask register destination indicated in the MODRM.REG field is used to form the basis of the register pair. The low bit of that field is masked off (set to zero) to create the first register of the pair.",
+                "url": "http://www.felixcloutier.com/x86/VP2INTERSECTD%3AVP2INTERSECTQ.html"
+            };
+
         case "VP4DPWSSD":
             return {
                 "html": "<p>This instruction computes 4 sequential register source-block dot-products of two signed word operands with doubleword accumulation; see <a href=\"http://www.felixcloutier.com/x86/VP4DPWSSD.html#fig-7-1\" rel=\"noreferrer noopener\" target=\"_blank\">Figure 7-1</a> below. The memory operand is sequentially selected in each of the four steps.</p><p>In the above box, the notation of \u201c+3\u201d' is used to denote that the instruction accesses 4 source registers based on that operand; sources are consecutive, start in a multiple of 4 boundary, and contain the encoded register operand.</p><p>This instruction supports memory fault suppression. The entire memory operand is loaded if any bit of the lowest 16-bits of the mask is set to 1 or if a \u201cno masking\u201d encoding is used.</p><p>The tuple type Tuple1_4X implies that four 32-bit elements (16 bytes) are referenced by the memory operation portion of this instruction.</p>",
@@ -4284,6 +4266,14 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/VPCMPW%3AVPCMPUW.html"
             };
 
+        case "VPCOMPRESSB":
+        case "VPCOMPRESSW":
+            return {
+                "html": "<p>Compress (stores) up to 64 byte values or 32 word values from the source operand (second operand) to the destination operand (first operand), based on the active elements determined by the writemask operand. Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.</p><p>Moves up to 512 bits of packed byte values from the source operand (second operand) to the destination operand (first operand). This instruction is used to store partial contents of a vector register into a byte vector or single memory location using the active elements in operand writemask.</p><p>Memory destination version: Only the contiguous vector is written to the destination memory location. EVEX.z must be zero.</p><p>Register destination version: If the vector length of the contiguous vector is less than that of the input vector in the source operand, the upper bits of the destination register are unmodified if EVEX.z is not set, otherwise the upper bits are zeroed.</p><p>This instruction supports memory fault suppression.</p>",
+                "tooltip": "Compress (stores) up to 64 byte values or 32 word values from the source operand (second operand) to the destination operand (first operand), based on the active elements determined by the writemask operand. Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.",
+                "url": "http://www.felixcloutier.com/x86/VPCOMPRESSB%3AVCOMPRESSW.html"
+            };
+
         case "VPCOMPRESSD":
             return {
                 "html": "<p>Compress (store) up to 16/8/4 doubleword integer values from the source operand (second operand) to the destination operand (first operand). The source operand is a ZMM/YMM/XMM register, the destination operand can be a ZMM/YMM/XMM register or a 512/256/128-bit memory location.</p><p>The opmask register k1 selects the active elements (partial vector or possibly non-contiguous if less than 16 active elements) from the source operand to compress into a contiguous vector. The contiguous vector is written to the destination starting from the low element of the destination operand.</p><p>Memory destination version: Only the contiguous vector is written to the destination memory location. EVEX.z must be zero.</p><p>Register destination version: If the vector length of the contiguous vector is less than that of the input vector in the source operand, the upper bits of the destination register are unmodified if EVEX.z is not set, otherwise the upper bits are zeroed.</p><p>Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.</p>",
@@ -4306,6 +4296,34 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/VPCONFLICTD%3AVPCONFLICTQ.html"
             };
 
+        case "VPDPBUSD":
+            return {
+                "html": "<p>Multiplies the individual unsigned bytes of the first source operand by the corresponding signed bytes of the second source operand, producing intermediate signed word results. The word results are then summed and accumulated in the destination dword element size operand.</p><p>This instruction supports memory fault suppression.</p>",
+                "tooltip": "Multiplies the individual unsigned bytes of the first source operand by the corresponding signed bytes of the second source operand, producing intermediate signed word results. The word results are then summed and accumulated in the destination dword element size operand.",
+                "url": "http://www.felixcloutier.com/x86/VPDPBUSD.html"
+            };
+
+        case "VPDPBUSDS":
+            return {
+                "html": "<p>Multiplies the individual unsigned bytes of the first source operand by the corresponding signed bytes of the second source operand, producing intermediate signed word results. The word results are then summed and accumulated in the destination dword element size operand. If the intermediate sum overflows a 32b signed number the result is saturated to either 0x7FFF_FFFF for positive numbers of 0x8000_0000 for negative numbers.</p><p>This instruction supports memory fault suppression.</p>",
+                "tooltip": "Multiplies the individual unsigned bytes of the first source operand by the corresponding signed bytes of the second source operand, producing intermediate signed word results. The word results are then summed and accumulated in the destination dword element size operand. If the intermediate sum overflows a 32b signed number the result is saturated to either 0x7FFF_FFFF for positive numbers of 0x8000_0000 for negative numbers.",
+                "url": "http://www.felixcloutier.com/x86/VPDPBUSDS.html"
+            };
+
+        case "VPDPWSSD":
+            return {
+                "html": "<p>Multiplies the individual signed words of the first source operand by the corresponding signed words of the second source operand, producing intermediate signed, doubleword results. The adjacent doubleword results are then summed and accumulated in the destination operand.</p><p>This instruction supports memory fault suppression.</p>",
+                "tooltip": "Multiplies the individual signed words of the first source operand by the corresponding signed words of the second source operand, producing intermediate signed, doubleword results. The adjacent doubleword results are then summed and accumulated in the destination operand.",
+                "url": "http://www.felixcloutier.com/x86/VPDPWSSD.html"
+            };
+
+        case "VPDPWSSDS":
+            return {
+                "html": "<p>Multiplies the individual signed words of the first source operand by the corresponding signed words of the second source operand, producing intermediate signed, doubleword results. The adjacent doubleword results are then summed and accumulated in the destination operand. If the intermediate sum overflows a 32b signed number, the result is saturated to either 0x7FFF_FFFF for positive numbers of 0x8000_0000 for negative numbers.</p><p>This instruction supports memory fault suppression.</p>",
+                "tooltip": "Multiplies the individual signed words of the first source operand by the corresponding signed words of the second source operand, producing intermediate signed, doubleword results. The adjacent doubleword results are then summed and accumulated in the destination operand. If the intermediate sum overflows a 32b signed number, the result is saturated to either 0x7FFF_FFFF for positive numbers of 0x8000_0000 for negative numbers.",
+                "url": "http://www.felixcloutier.com/x86/VPDPWSSDS.html"
+            };
+
         case "VPERM2F128":
             return {
                 "html": "<p>Permute 128 bit floating-point-containing fields from the first source operand (second operand) and second source operand (third operand) using bits in the 8-bit immediate and store results in the destination operand (first operand). The first source operand is a YMM register, the second source operand is a YMM register or a 256-bit memory location, and the destination operand is a YMM register.</p><p>Imm8[1:0] select the source for the first destination 128-bit field, imm8[5:4] select the source for the second destination field. If imm8[3] is set, the low 128-bit field is zeroed. If imm8[7] is set, the high 128-bit field is zeroed.</p><p>VEX.L must be 1, otherwise the instruction will #UD.</p>",
@@ -4406,6 +4424,14 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/VPERMT2W%3AVPERMT2D%3AVPERMT2Q%3AVPERMT2PS%3AVPERMT2PD.html"
             };
 
+        case "VPEXPANDB":
+        case "VPEXPANDW":
+            return {
+                "html": "<p>Expands (loads) up to 64 byte integer values or 32 word integer values from the source operand (memory operand) to the destination operand (register operand), based on the active elements determined by the writemask operand.</p><p>Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.</p><p>Moves 128, 256 or 512 bits of packed byte integer values from the source operand (memory operand) to the destination operand (register operand). This instruction is used to load from an int8 vector register or memory location while inserting the data into sparse elements of destination vector register using the active elements pointed out by the operand writemask.</p><p>This instruction supports memory fault suppression.</p><p>Note that the compressed displacement assumes a pre-scaling (N) corresponding to the size of one single element instead of the size of the full vector.</p>",
+                "tooltip": "Expands (loads) up to 64 byte integer values or 32 word integer values from the source operand (memory operand) to the destination operand (register operand), based on the active elements determined by the writemask operand.",
+                "url": "http://www.felixcloutier.com/x86/VPEXPANDB%3AVPEXPANDW.html"
+            };
+
         case "VPEXPANDD":
             return {
                 "html": "<p>Expand (load) up to 16 contiguous doubleword integer values of the input vector in the source operand (the second operand) to sparse elements in the destination operand (the first operand), selected by the writemask k1. The destination operand is a ZMM register, the source operand can be a ZMM register or memory location.</p><p>The input vector starts from the lowest element in the source operand. The opmask register k1 selects the destination elements (a partial vector or sparse elements if less than 8 elements) to be replaced by the ascending elements in the input vector. Destination elements not selected by the writemask k1 are either unmodified or zeroed, depending on EVEX.z.</p><p>Note: EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.</p><p>Note that the compressed displacement assumes a pre-scaling (N) corresponding to the size of one single element instead of the size of the full vector.</p>",
@@ -4547,6 +4573,16 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/VPMULTISHIFTQB.html"
             };
 
+        case "VPOPCNTB":
+        case "VPOPCNTD":
+        case "VPOPCNTQ":
+        case "VPOPCNTW":
+            return {
+                "html": "<p>This instruction counts the number of bits set to one in each byte, word, dword or qword element of its source (e.g., zmm2 or memory) and places the results in the destination register (zmm1). This instruction supports memory fault suppression.</p>",
+                "tooltip": "This instruction counts the number of bits set to one in each byte, word, dword or qword element of its source (e.g., zmm2 or memory) and places the results in the destination register (zmm1). This instruction supports memory fault suppression.",
+                "url": "http://www.felixcloutier.com/x86/VPOPCNT.html"
+            };
+
         case "VPROLD":
         case "VPROLQ":
         case "VPROLVD":
@@ -4577,6 +4613,49 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/VPSCATTERDD%3AVPSCATTERDQ%3AVPSCATTERQD%3AVPSCATTERQQ.html"
             };
 
+        case "VPSHLDD":
+        case "VPSHLDQ":
+        case "VPSHLDW":
+            return {
+                "html": "<p>Concatenate packed data, extract result shifted to the left by constant value.</p><p>This instruction supports memory fault suppression.</p>",
+                "tooltip": "Concatenate packed data, extract result shifted to the left by constant value.",
+                "url": "http://www.felixcloutier.com/x86/VPSHLD.html"
+            };
+
+        case "VPSHLDVD":
+        case "VPSHLDVQ":
+        case "VPSHLDVW":
+            return {
+                "html": "<p>Concatenate packed data, extract result shifted to the left by variable value.</p><p>This instruction supports memory fault suppression.</p>",
+                "tooltip": "Concatenate packed data, extract result shifted to the left by variable value.",
+                "url": "http://www.felixcloutier.com/x86/VPSHLDV.html"
+            };
+
+        case "VPSHRDD":
+        case "VPSHRDQ":
+        case "VPSHRDW":
+            return {
+                "html": "<p>Concatenate packed data, extract result shifted to the right by constant value.</p><p>This instruction supports memory fault suppression.</p>",
+                "tooltip": "Concatenate packed data, extract result shifted to the right by constant value.",
+                "url": "http://www.felixcloutier.com/x86/VPSHRD.html"
+            };
+
+        case "VPSHRDVD":
+        case "VPSHRDVQ":
+        case "VPSHRDVW":
+            return {
+                "html": "<p>Concatenate packed data, extract result shifted to the right by variable value.</p><p>This instruction supports memory fault suppression.</p>",
+                "tooltip": "Concatenate packed data, extract result shifted to the right by variable value.",
+                "url": "http://www.felixcloutier.com/x86/VPSHRDV.html"
+            };
+
+        case "VPSHUFBITQMB":
+            return {
+                "html": "<p>The VPSHUFBITQMB instruction performs a bit gather select using second source as control and first source as data. Each bit uses 6 control bits (2nd source operand) to select which data bit is going to be gathered (first source operand). A given bit can only access 64 different bits of data (first 64 destination bits can access first 64 data bits, second 64 destination bits can access second 64 data bits, etc.).</p><p>Control data for each output bit is stored in 8 bit elements of SRC2, but only the 6 least significant bits of each element are used.</p><p>This instruction uses write masking (zeroing only). This instruction supports memory fault suppression.</p><p>The first source operand is a ZMM register. The second source operand is a ZMM register or a memory location. The destination operand is a mask register.</p>",
+                "tooltip": "The VPSHUFBITQMB instruction performs a bit gather select using second source as control and first source as data. Each bit uses 6 control bits (2nd source operand) to select which data bit is going to be gathered (first source operand). A given bit can only access 64 different bits of data (first 64 destination bits can access first 64 data bits, second 64 destination bits can access second 64 data bits, etc.).",
+                "url": "http://www.felixcloutier.com/x86/VPSHUFBITQMB.html"
+            };
+
         case "VPSLLVD":
         case "VPSLLVQ":
         case "VPSLLVW":
@@ -4662,28 +4741,28 @@ export function getAsmOpcode(opcode) {
 
         case "VRCP14PD":
             return {
-                "html": "<p>This instruction performs a SIMD computation of the approximate reciprocals of eight/four/two packed double-precision floating-point values in the source operand (the second operand) and stores the packed double-precision floating-point results in the destination operand. The maximum relative error for this approximation is less than 2<sup>-</sup>14<sub>.</sub></p><p>The source operand can be a ZMM register, a 512-bit memory location, or a 512-bit vector broadcasted from a 64-bit memory location. The destination operand is a ZMM register conditionally updated according to the writemask.</p><p>The VRCP14PD instruction is not affected by the rounding control bits in the MXCSR register. When a source value is a 0.0, an \u221e with the sign of the source value is returned. A denormal source value will be treated as zero only in case of DAZ bit set in MXCSR. Otherwise it is treated correctly (i.e. not as a 0.0). Underflow results are flushed to zero only in case of FTZ bit set in MXCSR. Otherwise it will be treated correctly (i.e. correct underflow result is written) with the sign of the operand. When a source value is a SNaN or QNaN, the SNaN is converted to a QNaN or the source QNaN is returned.</p><p>EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.</p><p>MXCSR exception flags are not affected by this instruction and floating-point exceptions are not reported.</p>",
+                "html": "<p>This instruction performs a SIMD computation of the approximate reciprocals of eight/four/two packed double-precision floating-point values in the source operand (the second operand) and stores the packed double-precision floating-point results in the destination operand. The maximum relative error for this approximation is less than 2<sup>-</sup>14<sub>.</sub></p><p>The source operand can be a ZMM register, a 512-bit memory location, or a 512-bit vector broadcasted from a 64-bit memory location. The destination operand is a ZMM register conditionally updated according to the writemask.</p><p>The VRCP14PD instruction is not affected by the rounding control bits in the MXCSR register. When a source value is a 0.0, an \u221e with the sign of the source value is returned. A denormal source value will be treated as zero only in case of DAZ bit set in MXCSR. Otherwise it is treated correctly (i.e., not as a 0.0). Underflow results are flushed to zero only in case of FTZ bit set in MXCSR. Otherwise it will be treated correctly (i.e., correct underflow result is written) with the sign of the operand. When a source value is a SNaN or QNaN, the SNaN is converted to a QNaN or the source QNaN is returned.</p><p>EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.</p><p>MXCSR exception flags are not affected by this instruction and floating-point exceptions are not reported.</p>",
                 "tooltip": "This instruction performs a SIMD computation of the approximate reciprocals of eight/four/two packed double-precision floating-point values in the source operand (the second operand) and stores the packed double-precision floating-point results in the destination operand. The maximum relative error for this approximation is less than 2-14.",
                 "url": "http://www.felixcloutier.com/x86/VRCP14PD.html"
             };
 
         case "VRCP14PS":
             return {
-                "html": "<p>This instruction performs a SIMD computation of the approximate reciprocals of the packed single-precision floating-point values in the source operand (the second operand) and stores the packed single-precision floating-point results in the destination operand (the first operand). The maximum relative error for this approximation is less than 2<sup>-14</sup>.</p><p>The source operand can be a ZMM register, a 512-bit memory location or a 512-bit vector broadcasted from a 32-bit memory location. The destination operand is a ZMM register conditionally updated according to the writemask.</p><p>The VRCP14PS instruction is not affected by the rounding control bits in the MXCSR register. When a source value is a 0.0, an \u221e with the sign of the source value is returned. A denormal source value will be treated as zero only in case of DAZ bit set in MXCSR. Otherwise it is treated correctly (i.e. not as a 0.0). Underflow results are flushed to zero only in case of FTZ bit set in MXCSR. Otherwise it will be treated correctly (i.e. correct underflow result is written) with the sign of the operand. When a source value is a SNaN or QNaN, the SNaN is converted to a QNaN or the source QNaN is returned.</p><p>EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.</p><p>MXCSR exception flags are not affected by this instruction and floating-point exceptions are not reported.</p>",
+                "html": "<p>This instruction performs a SIMD computation of the approximate reciprocals of the packed single-precision floating-point values in the source operand (the second operand) and stores the packed single-precision floating-point results in the destination operand (the first operand). The maximum relative error for this approximation is less than 2<sup>-14</sup>.</p><p>The source operand can be a ZMM register, a 512-bit memory location or a 512-bit vector broadcasted from a 32-bit memory location. The destination operand is a ZMM register conditionally updated according to the writemask.</p><p>The VRCP14PS instruction is not affected by the rounding control bits in the MXCSR register. When a source value is a 0.0, an \u221e with the sign of the source value is returned. A denormal source value will be treated as zero only in case of DAZ bit set in MXCSR. Otherwise it is treated correctly (i.e., not as a 0.0). Underflow results are flushed to zero only in case of FTZ bit set in MXCSR. Otherwise it will be treated correctly (i.e., correct underflow result is written) with the sign of the operand. When a source value is a SNaN or QNaN, the SNaN is converted to a QNaN or the source QNaN is returned.</p><p>EVEX.vvvv is reserved and must be 1111b otherwise instructions will #UD.</p><p>MXCSR exception flags are not affected by this instruction and floating-point exceptions are not reported.</p>",
                 "tooltip": "This instruction performs a SIMD computation of the approximate reciprocals of the packed single-precision floating-point values in the source operand (the second operand) and stores the packed single-precision floating-point results in the destination operand (the first operand). The maximum relative error for this approximation is less than 2-14.",
                 "url": "http://www.felixcloutier.com/x86/VRCP14PS.html"
             };
 
         case "VRCP14SD":
             return {
-                "html": "<p>This instruction performs a SIMD computation of the approximate reciprocal of the low double-precision floating-point value in the second source operand (the third operand) stores the result in the low quadword element of the destination operand (the first operand) according to the writemask k1. Bits (127:64) of the XMM register destination are copied from corresponding bits in the first source operand (the second operand). The maximum relative error for this approximation is less than 2<sup>-14</sup>. The source operand can be an XMM register or a 64-bit memory location. The destination operand is an XMM register.</p><p>The VRCP14SD instruction is not affected by the rounding control bits in the MXCSR register. When a source value is a 0.0, an \u221e with the sign of the source value is returned. A denormal source value will be treated as zero only in case of DAZ bit set in MXCSR. Otherwise it is treated correctly (i.e. not as a 0.0). Underflow results are flushed to zero only in case of FTZ bit set in MXCSR. Otherwise it will be treated correctly (i.e. correct underflow result is written) with the sign of the operand. When a source value is a SNaN or QNaN, the SNaN is converted to a QNaN or the source QNaN is returned. See <a href=\"http://www.felixcloutier.com/x86/VRCP14PD.html#tbl-5-22\" rel=\"noreferrer noopener\" target=\"_blank\">Table 5-22</a> for special-case input values.</p><p>MXCSR exception flags are not affected by this instruction and floating-point exceptions are not reported.</p><p>A numerically exact implementation of VRCP14xx can be found at:</p>",
+                "html": "<p>This instruction performs a SIMD computation of the approximate reciprocal of the low double-precision floating-point value in the second source operand (the third operand) stores the result in the low quadword element of the destination operand (the first operand) according to the writemask k1. Bits (127:64) of the XMM register destination are copied from corresponding bits in the first source operand (the second operand). The maximum relative error for this approximation is less than 2<sup>-14</sup>. The source operand can be an XMM register or a 64-bit memory location. The destination operand is an XMM register.</p><p>The VRCP14SD instruction is not affected by the rounding control bits in the MXCSR register. When a source value is a 0.0, an \u221e with the sign of the source value is returned. A denormal source value will be treated as zero only in case of DAZ bit set in MXCSR. Otherwise it is treated correctly (i.e., not as a 0.0). Underflow results are flushed to zero only in case of FTZ bit set in MXCSR. Otherwise it will be treated correctly (i.e., correct underflow result is written) with the sign of the operand. When a source value is a SNaN or QNaN, the SNaN is converted to a QNaN or the source QNaN is returned. See <a href=\"http://www.felixcloutier.com/x86/VRCP14PD.html#tbl-5-22\" rel=\"noreferrer noopener\" target=\"_blank\">Table 5-22</a> for special-case input values.</p><p>MXCSR exception flags are not affected by this instruction and floating-point exceptions are not reported.</p><p>A numerically exact implementation of VRCP14xx can be found at:</p>",
                 "tooltip": "This instruction performs a SIMD computation of the approximate reciprocal of the low double-precision floating-point value in the second source operand (the third operand) stores the result in the low quadword element of the destination operand (the first operand) according to the writemask k1. Bits (127:64) of the XMM register destination are copied from corresponding bits in the first source operand (the second operand). The maximum relative error for this approximation is less than 2-14. The source operand can be an XMM register or a 64-bit memory location. The destination operand is an XMM register.",
                 "url": "http://www.felixcloutier.com/x86/VRCP14SD.html"
             };
 
         case "VRCP14SS":
             return {
-                "html": "<p>This instruction performs a SIMD computation of the approximate reciprocal of the low single-precision floating-point value in the second source operand (the third operand) and stores the result in the low quadword element of the destination operand (the first operand) according to the writemask k1. Bits (127:32) of the XMM register destination are copied from corresponding bits in the first source operand (the second operand). The maximum relative error for this approximation is less than 2<sup>-14</sup>. The source operand can be an XMM register or a 32-bit memory location. The destination operand is an XMM register.</p><p>The VRCP14SS instruction is not affected by the rounding control bits in the MXCSR register. When a source value is a 0.0, an \u221e with the sign of the source value is returned. A denormal source value will be treated as zero only in case of DAZ bit set in MXCSR. Otherwise it is treated correctly (i.e. not as a 0.0). Underflow results are flushed to zero only in case of FTZ bit set in MXCSR. Otherwise it will be treated correctly (i.e. correct underflow result is written) with the sign of the operand. When a source value is a SNaN or QNaN, the SNaN is converted to a QNaN or the source QNaN is returned. See <a href=\"http://www.felixcloutier.com/x86/VRCP14PS.html#tbl-5-23\" rel=\"noreferrer noopener\" target=\"_blank\">Table 5-23</a> for special-case input values.</p><p>MXCSR exception flags are not affected by this instruction and floating-point exceptions are not reported.</p>",
+                "html": "<p>This instruction performs a SIMD computation of the approximate reciprocal of the low single-precision floating-point value in the second source operand (the third operand) and stores the result in the low quadword element of the destination operand (the first operand) according to the writemask k1. Bits (127:32) of the XMM register destination are copied from corresponding bits in the first source operand (the second operand). The maximum relative error for this approximation is less than 2<sup>-14</sup>. The source operand can be an XMM register or a 32-bit memory location. The destination operand is an XMM register.</p><p>The VRCP14SS instruction is not affected by the rounding control bits in the MXCSR register. When a source value is a 0.0, an \u221e with the sign of the source value is returned. A denormal source value will be treated as zero only in case of DAZ bit set in MXCSR. Otherwise it is treated correctly (i.e., not as a 0.0). Underflow results are flushed to zero only in case of FTZ bit set in MXCSR. Otherwise it will be treated correctly (i.e., correct underflow result is written) with the sign of the operand. When a source value is a SNaN or QNaN, the SNaN is converted to a QNaN or the source QNaN is returned. See <a href=\"http://www.felixcloutier.com/x86/VRCP14PS.html#tbl-5-23\" rel=\"noreferrer noopener\" target=\"_blank\">Table 5-23</a> for special-case input values.</p><p>MXCSR exception flags are not affected by this instruction and floating-point exceptions are not reported.</p>",
                 "tooltip": "This instruction performs a SIMD computation of the approximate reciprocal of the low single-precision floating-point value in the second source operand (the third operand) and stores the result in the low quadword element of the destination operand (the first operand) according to the writemask k1. Bits (127:32) of the XMM register destination are copied from corresponding bits in the first source operand (the second operand). The maximum relative error for this approximation is less than 2-14. The source operand can be an XMM register or a 32-bit memory location. The destination operand is an XMM register.",
                 "url": "http://www.felixcloutier.com/x86/VRCP14SS.html"
             };
@@ -4830,29 +4909,29 @@ export function getAsmOpcode(opcode) {
 
         case "VSCALEFPD":
             return {
-                "html": "<p>Performs a floating-point scale of the packed double-precision floating-point values in the first source operand by multiplying it by 2 power of the double-precision floating-point values in second source operand.</p><p>The equation of this operation is given by:</p><p>zmm1 := zmm2*2<sup>floor(zmm3)</sup>.</p><p>Floor(zmm3) means maximum integer value \u2264 zmm3.</p><p>If the result cannot be represented in double precision, then the proper overflow response (for positive scaling operand), or the proper underflow response (for negative scaling operand) is issued. The overflow and underflow responses are dependent on the rounding mode (for IEEE-compliant rounding), as well as on other settings in MXCSR (exception mask bits, FTZ bit), and on the SAE bit.</p>",
-                "tooltip": "Performs a floating-point scale of the packed double-precision floating-point values in the first source operand by multiplying it by 2 power of the double-precision floating-point values in second source operand.",
+                "html": "<p>Performs a floating-point scale of the packed double-precision floating-point values in the first source operand by multiplying them by 2 to the power of the double-precision floating-point values in second source operand.</p><p>The equation of this operation is given by:</p><p>zmm1 := zmm2*2<sup>floor(zmm3)</sup>.</p><p>Floor(zmm3) means maximum integer value \u2264 zmm3.</p><p>If the result cannot be represented in double precision, then the proper overflow response (for positive scaling operand), or the proper underflow response (for negative scaling operand) is issued. The overflow and underflow responses are dependent on the rounding mode (for IEEE-compliant rounding), as well as on other settings in MXCSR (exception mask bits, FTZ bit), and on the SAE bit.</p>",
+                "tooltip": "Performs a floating-point scale of the packed double-precision floating-point values in the first source operand by multiplying them by 2 to the power of the double-precision floating-point values in second source operand.",
                 "url": "http://www.felixcloutier.com/x86/VSCALEFPD.html"
             };
 
         case "VSCALEFPS":
             return {
-                "html": "<p>Performs a floating-point scale of the packed single-precision floating-point values in the first source operand by multiplying it by 2 power of the float32 values in second source operand.</p><p>The equation of this operation is given by:</p><p>zmm1 := zmm2*2<sup>floor(zmm3)</sup>.</p><p>Floor(zmm3) means maximum integer value \u2264 zmm3.</p><p>If the result cannot be represented in single precision, then the proper overflow response (for positive scaling operand), or the proper underflow response (for negative scaling operand) is issued. The overflow and underflow responses are dependent on the rounding mode (for IEEE-compliant rounding), as well as on other settings in MXCSR (exception mask bits, FTZ bit), and on the SAE bit.</p>",
-                "tooltip": "Performs a floating-point scale of the packed single-precision floating-point values in the first source operand by multiplying it by 2 power of the float32 values in second source operand.",
+                "html": "<p>Performs a floating-point scale of the packed single-precision floating-point values in the first source operand by multiplying them by 2 to the power of the float32 values in second source operand.</p><p>The equation of this operation is given by:</p><p>zmm1 := zmm2*2<sup>floor(zmm3)</sup>.</p><p>Floor(zmm3) means maximum integer value \u2264 zmm3.</p><p>If the result cannot be represented in single precision, then the proper overflow response (for positive scaling operand), or the proper underflow response (for negative scaling operand) is issued. The overflow and underflow responses are dependent on the rounding mode (for IEEE-compliant rounding), as well as on other settings in MXCSR (exception mask bits, FTZ bit), and on the SAE bit.</p>",
+                "tooltip": "Performs a floating-point scale of the packed single-precision floating-point values in the first source operand by multiplying them by 2 to the power of the float32 values in second source operand.",
                 "url": "http://www.felixcloutier.com/x86/VSCALEFPS.html"
             };
 
         case "VSCALEFSD":
             return {
-                "html": "<p>Performs a floating-point scale of the packed double-precision floating-point value in the first source operand by multiplying it by 2 power of the double-precision floating-point value in second source operand.</p><p>The equation of this operation is given by:</p><p>xmm1 := xmm2*2<sup>floor(xmm3)</sup>.</p><p>Floor(xmm3) means maximum integer value \u2264 xmm3.</p><p>If the result cannot be represented in double precision, then the proper overflow response (for positive scaling operand), or the proper underflow response (for negative scaling operand) is issued. The overflow and underflow responses are dependent on the rounding mode (for IEEE-compliant rounding), as well as on other settings in MXCSR (exception mask bits, FTZ bit), and on the SAE bit.</p>",
-                "tooltip": "Performs a floating-point scale of the packed double-precision floating-point value in the first source operand by multiplying it by 2 power of the double-precision floating-point value in second source operand.",
+                "html": "<p>Performs a floating-point scale of the scalar double-precision floating-point value in the first source operand by multiplying it by 2 to the power of the double-precision floating-point value in second source operand.</p><p>The equation of this operation is given by:</p><p>xmm1 := xmm2*2<sup>floor(xmm3)</sup>.</p><p>Floor(xmm3) means maximum integer value \u2264 xmm3.</p><p>If the result cannot be represented in double precision, then the proper overflow response (for positive scaling operand), or the proper underflow response (for negative scaling operand) is issued. The overflow and underflow responses are dependent on the rounding mode (for IEEE-compliant rounding), as well as on other settings in MXCSR (exception mask bits, FTZ bit), and on the SAE bit.</p>",
+                "tooltip": "Performs a floating-point scale of the scalar double-precision floating-point value in the first source operand by multiplying it by 2 to the power of the double-precision floating-point value in second source operand.",
                 "url": "http://www.felixcloutier.com/x86/VSCALEFSD.html"
             };
 
         case "VSCALEFSS":
             return {
-                "html": "<p>Performs a floating-point scale of the scalar single-precision floating-point value in the first source operand by multiplying it by 2 power of the float32 value in second source operand.</p><p>The equation of this operation is given by:</p><p>xmm1 := xmm2*2<sup>floor(xmm3)</sup>.</p><p>Floor(xmm3) means maximum integer value \u2264 xmm3.</p><p>If the result cannot be represented in single precision, then the proper overflow response (for positive scaling operand), or the proper underflow response (for negative scaling operand) is issued. The overflow and underflow responses are dependent on the rounding mode (for IEEE-compliant rounding), as well as on other settings in MXCSR (exception mask bits, FTZ bit), and on the SAE bit.</p>",
-                "tooltip": "Performs a floating-point scale of the scalar single-precision floating-point value in the first source operand by multiplying it by 2 power of the float32 value in second source operand.",
+                "html": "<p>Performs a floating-point scale of the scalar single-precision floating-point value in the first source operand by multiplying it by 2 to the power of the float32 value in second source operand.</p><p>The equation of this operation is given by:</p><p>xmm1 := xmm2*2<sup>floor(xmm3)</sup>.</p><p>Floor(xmm3) means maximum integer value \u2264 xmm3.</p><p>If the result cannot be represented in single precision, then the proper overflow response (for positive scaling operand), or the proper underflow response (for negative scaling operand) is issued. The overflow and underflow responses are dependent on the rounding mode (for IEEE-compliant rounding), as well as on other settings in MXCSR (exception mask bits, FTZ bit), and on the SAE bit.</p>",
+                "tooltip": "Performs a floating-point scale of the scalar single-precision floating-point value in the first source operand by multiplying it by 2 to the power of the float32 value in second source operand.",
                 "url": "http://www.felixcloutier.com/x86/VSCALEFSS.html"
             };
 
@@ -4906,15 +4985,15 @@ export function getAsmOpcode(opcode) {
 
         case "VZEROALL":
             return {
-                "html": "<p>The instruction zeros contents of all XMM or YMM registers.</p><p>Note: VEX.vvvv is reserved and must be 1111b, otherwise instructions will #UD. In Compatibility and legacy 32-bit mode only the lower 8 registers are modified.</p>",
-                "tooltip": "The instruction zeros contents of all XMM or YMM registers.",
+                "html": "<p>In 64-bit mode, the instruction zeroes XMM0-XMM15, YMM0-YMM15, and ZMM0-ZMM15. Outside 64-bit mode, it zeroes only XMM0-XMM7, YMM0-YMM7, and ZMM0-ZMM7. VZEROALL does not modify ZMM16-ZMM31.</p><p>Note: VEX.vvvv is reserved and must be 1111b, otherwise instructions will #UD. In Compatibility and legacy 32-bit mode only the lower 8 registers are modified.</p>",
+                "tooltip": "In 64-bit mode, the instruction zeroes XMM0-XMM15, YMM0-YMM15, and ZMM0-ZMM15. Outside 64-bit mode, it zeroes only XMM0-XMM7, YMM0-YMM7, and ZMM0-ZMM7. VZEROALL does not modify ZMM16-ZMM31.",
                 "url": "http://www.felixcloutier.com/x86/VZEROALL.html"
             };
 
         case "VZEROUPPER":
             return {
-                "html": "<p>The instruction zeros the bits in position 128 and higher of all YMM registers. The lower 128-bits of the registers (the corresponding XMM registers) are unmodified.</p><p>This instruction is recommended when transitioning between AVX and legacy SSE code - it will eliminate performance penalties caused by false dependencies.</p><p>Note: VEX.vvvv is reserved and must be 1111b otherwise instructions will #UD. In Compatibility and legacy 32-bit mode only the lower 8 registers are modified.</p>",
-                "tooltip": "The instruction zeros the bits in position 128 and higher of all YMM registers. The lower 128-bits of the registers (the corresponding XMM registers) are unmodified.",
+                "html": "<p>In 64-bit mode, the instruction zeroes the bits in positions 128 and higher in YMM0-YMM15 and ZMM0-ZMM15. Outside 64-bit mode, it zeroes those bits only in YMM0-YMM7 and ZMM0-ZMM7. VZEROUPPER does not modify the lower 128 bits of these registers and it does not modify ZMM16-ZMM31.</p><p>This instruction is recommended when transitioning between AVX and legacy SSE code; it will eliminate performance penalties caused by false dependencies.</p><p>Note: VEX.vvvv is reserved and must be 1111b otherwise instructions will #UD. In Compatibility and legacy 32-bit mode only the lower 8 registers are modified.</p>",
+                "tooltip": "In 64-bit mode, the instruction zeroes the bits in positions 128 and higher in YMM0-YMM15 and ZMM0-ZMM15. Outside 64-bit mode, it zeroes those bits only in YMM0-YMM7 and ZMM0-ZMM7. VZEROUPPER does not modify the lower 128 bits of these registers and it does not modify ZMM16-ZMM31.",
                 "url": "http://www.felixcloutier.com/x86/VZEROUPPER.html"
             };
 
@@ -4933,6 +5012,13 @@ export function getAsmOpcode(opcode) {
                 "url": "http://www.felixcloutier.com/x86/WBINVD.html"
             };
 
+        case "WBNOINVD":
+            return {
+                "html": "<p>The WBNOINVD instruction writes back all modified cache lines in the processor\u2019s internal cache to main memory but does not invalidate (flush) the internal caches.</p><p>After executing this instruction, the processor does not wait for the external caches to complete their write-back operation before proceeding with instruction execution. It is the responsibility of hardware to respond to the cache write-back signal. The amount of time or cycles for WBNOINVD to complete will vary due to size and other factors of different cache hierarchies. As a consequence, the use of the WBNOINVD instruction can have an impact on logical processor interrupt/event response time.</p><p>The WBNOINVD instruction is a privileged instruction. When the processor is running in protected mode, the CPL of a program or procedure must be 0 to execute this instruction. This instruction is also a serializing instruction (see \u201cSerializing Instructions\u201d in Chapter 8 of the <em>Intel\u00ae 64 and IA-32 Architectures Software Developer\u2019s Manual, Volume 3A</em>).</p><p>This instruction\u2019s operation is the same in non-64-bit modes and 64-bit mode.</p>",
+                "tooltip": "The WBNOINVD instruction writes back all modified cache lines in the processor\u2019s internal cache to main memory but does not invalidate (flush) the internal caches.",
+                "url": "http://www.felixcloutier.com/x86/WBNOINVD.html"
+            };
+
         case "WRFSBASE":
         case "WRGSBASE":
             return {
@@ -4950,7 +5036,7 @@ export function getAsmOpcode(opcode) {
 
         case "WRPKRU":
             return {
-                "html": "<p>Writes the value of EAX into PKRU. ECX and EDX must be 0 when WRPKRU is executed; otherwise, a general-protection exception (#GP) occurs.</p><p>WRPKRU can be executed only if CR4.PKE = 1; otherwise, an invalid-opcode exception (#UD) occurs. Software can discover the value of CR4.PKE by examining CPUID.(EAX=07H,ECX=0H):ECX.OSPKE [bit 4].</p><p>On processors that support the Intel 64 Architecture, the high-order 32-bits of RCX, RDX and RAX are ignored.</p>",
+                "html": "<p>Writes the value of EAX into PKRU. ECX and EDX must be 0 when WRPKRU is executed; otherwise, a general-protection exception (#GP) occurs.</p><p>WRPKRU can be executed only if CR4.PKE = 1; otherwise, an invalid-opcode exception (#UD) occurs. Software can discover the value of CR4.PKE by examining CPUID.(EAX=07H,ECX=0H):ECX.OSPKE [bit 4].</p><p>On processors that support the Intel 64 Architecture, the high-order 32-bits of RCX, RDX and RAX are ignored.</p><p>WRPKRU will never execute speculatively. Memory accesses affected by PKRU register will not execute (even speculatively) until all prior executions of WRPKRU have completed execution and updated the PKRU register.</p>",
                 "tooltip": "Writes the value of EAX into PKRU. ECX and EDX must be 0 when WRPKRU is executed; otherwise, a general-protection exception (#GP) occurs.",
                 "url": "http://www.felixcloutier.com/x86/WRPKRU.html"
             };
@@ -4979,8 +5065,8 @@ export function getAsmOpcode(opcode) {
 
         case "XBEGIN":
             return {
-                "html": "<p>The XBEGIN instruction specifies the start of an RTM code region. If the logical processor was not already in transactional execution, then the XBEGIN instruction causes the logical processor to transition into transactional execution. The XBEGIN instruction that transitions the logical processor into transactional execution is referred to as the outermost XBEGIN instruction. The instruction also specifies a relative offset to compute the address of the fallback code path following a transactional abort.</p><p>On an RTM abort, the logical processor discards all architectural register and memory updates performed during the RTM execution and restores architectural state to that corresponding to the outermost XBEGIN instruction. The fallback address following an abort is computed from the outermost XBEGIN instruction.</p>",
-                "tooltip": "The XBEGIN instruction specifies the start of an RTM code region. If the logical processor was not already in transactional execution, then the XBEGIN instruction causes the logical processor to transition into transactional execution. The XBEGIN instruction that transitions the logical processor into transactional execution is referred to as the outermost XBEGIN instruction. The instruction also specifies a relative offset to compute the address of the fallback code path following a transactional abort.",
+                "html": "<p>The XBEGIN instruction specifies the start of an RTM code region. If the logical processor was not already in transactional execution, then the XBEGIN instruction causes the logical processor to transition into transactional execution. The XBEGIN instruction that transitions the logical processor into transactional execution is referred to as the outermost XBEGIN instruction. The instruction also specifies a relative offset to compute the address of the fallback code path following a transactional abort. (Use of the 16-bit operand size does not cause this address to be truncated to 16 bits, unlike a near jump to a relative offset.)</p><p>On an RTM abort, the logical processor discards all architectural register and memory updates performed during the RTM execution and restores architectural state to that corresponding to the outermost XBEGIN instruction. The fallback address following an abort is computed from the outermost XBEGIN instruction.</p>",
+                "tooltip": "The XBEGIN instruction specifies the start of an RTM code region. If the logical processor was not already in transactional execution, then the XBEGIN instruction causes the logical processor to transition into transactional execution. The XBEGIN instruction that transitions the logical processor into transactional execution is referred to as the outermost XBEGIN instruction. The instruction also specifies a relative offset to compute the address of the fallback code path following a transactional abort. (Use of the 16-bit operand size does not cause this address to be truncated to 16 bits, unlike a near jump to a relative offset.)",
                 "url": "http://www.felixcloutier.com/x86/XBEGIN.html"
             };
 
@@ -5023,8 +5109,8 @@ export function getAsmOpcode(opcode) {
         case "VXORPD":
         case "XORPD":
             return {
-                "html": "<p>Performs a bitwise logical XOR of the two, four or eight packed double-precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand</p><p>EVEX.512 encoded version: The first source operand is a ZMM register. The second source operand can be a ZMM register or a vector memory location. The destination operand is a ZMM register conditionally updated with writemask k1.</p><p>VEX.256 and EVEX.256 encoded versions: The first source operand is a YMM register. The second source operand is a YMM register or a 256-bit memory location. The destination operand is a YMM register (conditionally updated with writemask k1 in case of EVEX). The upper bits (MAXVL-1:256) of the corresponding ZMM register destination are zeroed.</p><p>VEX.128 and EVEX.128 encoded versions: The first source operand is an XMM register. The second source operand is an XMM register or 128-bit memory location. The destination operand is an XMM register (conditionally updated with writemask k1 in case of EVEX). The upper bits (MAXVL-1:128) of the corresponding ZMM register destination are zeroed.</p><p>128-bit Legacy SSE version: The second source can be an XMM register or an 128-bit memory location. The destination is not distinct from the first source XMM register and the upper bits (MAXVL-1:128) of the corresponding register destination are unmodified.</p>",
-                "tooltip": "Performs a bitwise logical XOR of the two, four or eight packed double-precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand",
+                "html": "<p>Performs a bitwise logical XOR of the two, four or eight packed double-precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand.</p><p>EVEX.512 encoded version: The first source operand is a ZMM register. The second source operand can be a ZMM register or a vector memory location. The destination operand is a ZMM register conditionally updated with writemask k1.</p><p>VEX.256 and EVEX.256 encoded versions: The first source operand is a YMM register. The second source operand is a YMM register or a 256-bit memory location. The destination operand is a YMM register (conditionally updated with writemask k1 in case of EVEX). The upper bits (MAXVL-1:256) of the corresponding ZMM register destination are zeroed.</p><p>VEX.128 and EVEX.128 encoded versions: The first source operand is an XMM register. The second source operand is an XMM register or 128-bit memory location. The destination operand is an XMM register (conditionally updated with writemask k1 in case of EVEX). The upper bits (MAXVL-1:128) of the corresponding ZMM register destination are zeroed.</p><p>128-bit Legacy SSE version: The second source can be an XMM register or an 128-bit memory location. The destination is not distinct from the first source XMM register and the upper bits (MAXVL-1:128) of the corresponding register destination are unmodified.</p>",
+                "tooltip": "Performs a bitwise logical XOR of the two, four or eight packed double-precision floating-point values from the first source operand and the second source operand, and stores the result in the destination operand.",
                 "url": "http://www.felixcloutier.com/x86/XORPD.html"
             };
author	Jeremy <51220084+jeremy-rifkin@users.noreply.github.com>	2022-12-02 12:40:18 -0500
committer	Jeremy <51220084+jeremy-rifkin@users.noreply.github.com>	2022-12-02 12:40:18 -0500
commit	627cbf46b9247611d4841b8d7b2f674e6d9cf3c8 (patch)
tree	60fcbde0260e78c7888b2b110b55fef9cfb8e26a
parent	2e844e9ef535e486bb6d661e984074aec4811577 (diff)
download	compiler-explorer-gh-5218.tar.gz compiler-explorer-gh-5218.zip