/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include"mpi.h" #include"prtypes.h"
/* * This file implements a single function: s_mpi_getProcessorLineSize(); * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line * if a cache exists, or zero if there is no cache. If more than one * cache line exists, it should return the smallest line size (which is * usually the L1 cache). * * mp_modexp uses this information to make sure that private key information * isn't being leaked through the cache. * * Currently the file returns good data for most modern x86 processors, and * reasonable data on 64-bit ppc processors. All other processors are assumed * to have a cache line size of 32 bytes. *
*/
#ifdefined(i386) || defined(__i386) || defined(__X86__) || defined(_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) /* X86 processors have special instructions that tell us about the cache */ #include"string.h"
#ifdefined(__GNUC__) void
freebl_cpuid(unsignedlong op, unsignedlong *eax, unsignedlong *ebx, unsignedlong *ecx, unsignedlong *edx)
{ /* Some older processors don't fill the ecx register with cpuid, so clobber it * before calling cpuid, so that there's no risk of picking random bits that * erroneously indicate that absent CPU features are present. * Also, GCC isn't smart enough to save the ebx PIC register on its own * in this case, so do it by hand. Use edi to store ebx and pass the
* value returned in ebx from cpuid through edi. */
__asm__("xor %%ecx, %%ecx\n\t" "mov %%ebx,%%edi\n\t" "cpuid\n\t" "xchgl %%ebx,%%edi\n\t"
: "=a"(*eax), "=D"(*ebx), "=c"(*ecx), "=d"(*edx)
: "0"(op));
}
/* * try flipping a processor flag to determine CPU type
*/ staticunsignedlong
changeFlag(unsignedlong flag)
{ unsignedlong changedFlags, originalFlags;
__asm__("pushfl\n\t"/* get the flags */ "popl %0\n\t" "movl %0,%1\n\t"/* save the original flags */ "xorl %2,%0\n\t"/* flip the bit */ "pushl %0\n\t"/* set the flags */ "popfl\n\t" "pushfl\n\t"/* get the flags again (for return) */ "popl %0\n\t" "pushl %1\n\t"/* restore the original flags */ "popfl\n\t"
: "=r"(changedFlags), "=r"(originalFlags), "=r"(flag)
: "2"(flag)); return changedFlags ^ originalFlags;
}
staticunsignedlong
changeFlag(unsignedlong flag)
{ unsignedlong changedFlags, originalFlags;
__asm {
push eax
push ebx
pushfd /* get the flags */
pop eax
push eax /* save the flags on the stack */
mov originalFlags,eax /* save the original flags */
mov ebx,flag xor eax,ebx /* flip the bit */
push eax /* set the flags */
popfd
pushfd /* get the flags again (for return) */
pop eax
popfd /* restore the original flags */
mov changedFlags,eax
pop ebx
pop eax
} return changedFlags ^ originalFlags;
} #endif
/* * use the above table to determine the CacheEntryLineSize.
*/ staticvoid
getIntelCacheEntryLineSize(unsignedlong val, int *level, unsignedlong *lineSize)
{
CacheType type;
type = CacheMap[val].type; /* only interested in data caches */ /* NOTE val = 0x40 is a special value that means no L2 or L3 cache. * this data check has the side effect of rejecting that entry. If
* that wasn't the case, we could have to reject it explicitly */ if (CacheMap[val].lineSize == 0) { return;
} /* look at the caches, skip types we aren't interested in. * if we already have a value for a lower level cache, skip the
* current entry */ if ((type == Cache_L1) || (type == Cache_L1d)) {
*level = 1;
*lineSize = CacheMap[val].lineSize;
} elseif ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) {
*level = 2;
*lineSize = CacheMap[val].lineSize;
} elseif ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) {
*level = 3;
*lineSize = CacheMap[val].lineSize;
} return;
}
/* * returns '0' if no recognized cache is found, or if the cache * information is supported by this processor
*/ staticunsignedlong
getIntelCacheLineSize(int cpuidLevel)
{ int level = 4; unsignedlong lineSize = 0; unsignedlong eax, ebx, ecx, edx; int repeat, count;
if (cpuidLevel < 2) { return 0;
}
/* command '2' of the cpuid is intel's cache info call. Each byte of the * 4 registers contain a potential descriptor for the cache. The CacheMap * table maps the cache entry with the processor cache. Register 'al' * contains a count value that cpuid '2' needs to be called in order to * find all the cache descriptors. Only registers with the high bit set * to 'zero' have valid descriptors. This code loops through all the * required calls to cpuid '2' and passes any valid descriptors it finds * to the getIntelRegisterCacheLineSize code, which breaks the registers * down into their component descriptors. In the end the lineSize of the
* lowest level cache data cache is returned. */
freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
repeat = eax & 0xf; for (count = 0; count < repeat; count++) { if ((eax & 0x80000000) == 0) {
getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize);
} if ((ebx & 0x80000000) == 0) {
getIntelRegisterCacheLineSize(ebx, &level, &lineSize);
} if ((ecx & 0x80000000) == 0) {
getIntelRegisterCacheLineSize(ecx, &level, &lineSize);
} if ((edx & 0x80000000) == 0) {
getIntelRegisterCacheLineSize(edx, &level, &lineSize);
} if (count + 1 != repeat) {
freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
}
} return lineSize;
}
/* * returns '0' if the cache info is not supported by this processor. * This is based on the AMD extended cache commands for cpuid. * (see "AMD Processor Recognition Application Note" Publication 20734). * Some other processors use the identical scheme. * (see "Processor Recognition, Transmeta Corporation").
*/ staticunsignedlong
getOtherCacheLineSize(unsignedlong cpuidLevel)
{ unsignedlong lineSize = 0; unsignedlong eax, ebx, ecx, edx;
/* get the Extended CPUID level */
freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
cpuidLevel = eax;
if (cpuidLevel >= 0x80000005) {
freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
lineSize = ecx & 0xff; /* line Size, L1 Data Cache */
} return lineSize;
}
#if !defined(AMD_64) if (is386()) { return 0; /* 386 had no cache */
} if (is486()) { return 32; /* really? need more info */
} #endif
/* Pentium, cpuid command is available */
freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
cpuidLevel = eax; /* string holds the CPU's manufacturer ID string - a twelve * character ASCII string stored in ebx, edx, ecx, and * the 32-bit extended feature flags are in edx, ecx.
*/
cpuid[0] = ebx;
cpuid[1] = ecx;
cpuid[2] = edx;
memcpy(string, cpuid, sizeof(cpuid));
string[12] = 0;
manufacturer = MAN_UNKNOWN; for (i = 0; i < n_manufacturers; i++) { if (strcmp(manMap[i], string) == 0) {
manufacturer = i;
}
}
if (manufacturer == INTEL) {
cacheLineSize = getIntelCacheLineSize(cpuidLevel);
} else {
cacheLineSize = getOtherCacheLineSize(cpuidLevel);
} /* doesn't support cache info based on cpuid. This means * an old pentium class processor, which have cache lines of * 32. If we learn differently, we can use a switch based on
* the Manufacturer id */ if (cacheLineSize == 0) {
cacheLineSize = 32;
} return cacheLineSize;
} #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 #endif
#ifdefined(__ppc64__) /* * Sigh, The PPC has some really nice features to help us determine cache * size, since it had lots of direct control functions to do so. The POWER * processor even has an instruction to do this, but it was dropped in * PowerPC. Unfortunately most of them are not available in user mode. * * The dcbz function would be a great way to determine cache line size except * 1) it only works on write-back memory (it throws an exception otherwise), * and 2) because so many mac programs 'knew' the processor cache size was * 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new * G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep * these programs happy. dcbzl work if 64 bit instructions are supported. * If you know 64 bit instructions are supported, and that stack is * write-back, you can use this code.
*/ #include"memory.h"
/* clear the cache line that contains 'array' */ staticinlinevoid
dcbzl(char *array)
{
__asm__("dcbzl %0, %1"
: /*no result*/
: "b%"(array), "r"(0)
: "memory");
}
/* align the array on a maximum line size boundary, so we
* know we are starting to clear from the first address */
test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE); /* set all the values to 1's */
memset(test, 0xff, PPC_MAX_LINE_SIZE); /* clear one cache block starting at 'test' */
dcbzl(test);
/* find the size of the cleared area, that's our block size */ for (i = PPC_MAX_LINE_SIZE; i != 0; i = i / 2) { if (test[i - 1] == 0) { return i;
}
} return 0;
}
/* * put other processor and platform specific cache code here * return the smallest cache line size in bytes on the processor * (usually the L1 cache). If the OS has a call, this would be * a greate place to put it. * * If there is no cache, return 0; * * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions * below aren't compiled. *
*/
/* If no way to get the processor cache line size has been defined, assume * it's 32 bytes (most common value, does not significantly impact performance)
*/ #ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED unsignedlong
s_mpi_getProcessorLineSize()
{ return 32;
} #endif
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.