/************************ *** Team Kitty, 2019 *** *** Sync *** ***********************/ /* This file contains all of the functions required to handle * the system handover from Syncboot. */ #include static void UpdateSegments(void); static void InstallInterrupt(size_t ISR, size_t Function); static void InstallTrap(size_t ISR, size_t Function); /* The GDT loaded by the kernel to take control from Syncboot. * It contains only the segments required to run in kernelmode. * The first segment is always 0. * The second segment is the Code segment, with exclusive execute permission. * The third segment is the Data segment, with r/w permission. * The forth segment is the Task segment, which is needed by x86_64. * It is static so that it is not stored in EfiLoaderData. */ __attribute__((aligned(64))) static size_t InitialGDT[5] = {0, 0x00AF9A000000FFFF, 0x00CF92000000FFFF, 0x0080890000000067, 0}; __attribute__((aligned(64))) static TSS_64 TSS64 = {0}; /* The IDT loaded by UEFI for handling keyboard input is stored in EfiLoaderData. * We're gonna need to reclaim that memory back, so we need to load our own IDT. * To do that, we can define 256 interrupts for us to use. */ __attribute__((aligned(64))) static IDT_GATE IDTData[256] = {0}; // We might need to allocate pages for the exception stacks. __attribute__((aligned(4096))) static size_t FirstPageTable[512] = {0}; /* Main system handover from UEFI. * Prepares the processor, the screen, and memory. */ void PrepareSystem(FILELOADER_PARAMS* FLOP) { Memory_Info.MemoryMap = FLOP->MemoryMap; Memory_Info.MemoryMapSize = FLOP->MemoryMap_Size; Memory_Info.MemoryMapDescriptorSize = FLOP->MemoryMapDescriptorSize; Memory_Info.MemoryMapDescriptorVersion = FLOP->MemoryMapDescriptorVersion; SetupPrinting(FLOP->GPU_Info->GPUs[0]); /* All print functions are now available. */ printf("ready!"); InstallGDT(); InstallIDT(); beep(); if(SetIdentityMap(FLOP->RTServices) == NULL) { Memory_Info.MemoryMap = FLOP->MemoryMap; } PrepareAVX(); /* Bit 5 of CR0 is Numeric Error, which enables * the internal x87 Floating Point Math error * reporting. */ size_t CR0 = ReadControlRegister(0); // Mask CR0 to only see bit 5 if( !(CR0 & (1 << 5))) { // Preserve CR0, but set bit 5. size_t TempReg = CR0 ^ (1 << 5); // Write it back WriteControlRegister(0, TempReg); // Double check. Some processors can be tricky. TempReg = ReadControlRegister(CR0); if(TempReg == CR0) printf("Error setting CR0.NE\r\n"); } /* Bit 10 of CR4 is OSXMMEXCPT, which enables * SSE instructions. */ size_t CR4 = ReadControlRegister(4); // Mask CR4 to only see bit 10 if( !(CR4 & (1 << 10))) { // Preserve CR4, but set bit 10. size_t TempReg = CR4 ^ (1 << 10); // Write it back WriteControlRegister(4, TempReg); // Double check. Some processors can be tricky. TempReg = ReadControlRegister(4); if(TempReg == CR4) printf("Error setting CR4.OSXMMEXCPT\r\n"); } // Set up memory management InstallMemoryMap(); InstallPaging(); // Clean up UEFI's mess ReclaimEfiBootServicesMemory(); ReclaimEfiLoaderCodeMemory(); // Let Intel ME take over power management PreparePowerManagement(); } /* * Following section is taken from the OSDev Wiki page on PC Speaker. * This is beeped first, before *anything* else. * This way, we know that at least *something* works. */ //Play sound using built in speaker static void play_sound(uint32_t nFrequence) { uint32_t Div; uint8_t tmp; //Set the PIT to the desired frequency Div = 1193180 / nFrequence; WritePort(0x0043, 0xb6, 1); WritePort(0x0042, (uint8_t) (Div), 1); WritePort(0x0042, (uint8_t) (Div >> 8), 1); //And play the sound using the PC speaker tmp = ReadPort(0x0061, 1); if (tmp != (tmp | 3)) { WritePort(0x0061, tmp | 3, 1); } } //make it shutup static void nosound() { uint8_t tmp = ReadPort(0x0061, 1) & 0xFC; WritePort(0x0061, tmp, 1); } //Make a beep void beep() { play_sound(1000); timer_wait(10); nosound(); //set_PIT_2(old_frequency); } void timer_wait(int ticks){ uint64_t FinalTick = time + ticks; while(time < FinalTick); } /* A temporary system for keeping track of system performance. */ size_t ClockTick() { // We will be reading a register, so we need to split it up. size_t RegHigh = 0, RegLow = 0; __asm__ __volatile__("rdtscp" : "=a" (RegLow), "=d" (RegHigh) : : "%rcx"); return (RegHigh << 32 | RegLow); } void PrepareAVX() { size_t RFLAGS = ReadControlRegister('f'); // Bit 21 of EFLAGS is ID, which tells whether the CPUID instruction is supported. size_t ID = RFLAGS ^ (1 << 21); WriteControlRegister('f', ID); ID = ReadControlRegister('f'); if (ID == RFLAGS) { printf("CPUID is not supported.\r\n"); } else { // We're going to be receiving input at these 3 registers size_t RBX = 0, RCX = 0, RDX = 0; __asm__ __volatile__("cpuid" : // Instruction "=c" (RCX), "=d" (RDX) : // Outputs (our results come back through here) "a" (0x01) : // Inputs (to rax) "%rbx"); // (anti-)Clobber list // Passing 0x01 as the "leaf" to CPUID gives us the Processor Feature information. // As part of the instruction, we get output in both RCX and RDX. // Bit 27 of RCX is the OSXSAVE bit - it says whether XSAVE was enabled by the operating system (us) if (RCX & (1 << 27)) { AVXStub(); } else { // If we get here, OSXSAVE is not set, so it is our duty to enable it, so that we can use extended processor features. // To do that though, we need to know if it is even supported. // That information is stored in bit 26 of RCX; xsave. if(RCX & (1 << 26)) { // Okay, XSAVE is supported, so we need to set it. // To do that, we set bit 18 of CR4. size_t CR4 = ReadControlRegister(4); WriteControlRegister(4, CR4 ^ (1 << 18)); // Double check, because some processors... etc. if(CR4 & (1 << 18)) { // XSAVE enabled. // Now we do the checks for AVX and AVX512. AVXStub(); } else { // For some reason we weren't able to enable OSXSAVE. printf("Unable to set OSXSaVE.\r\n"); } } else { // XSAVE is not supported, so we cannot enable any AVX features. printf("XSAVE is not supported.\r\n"); } } } } /* All of this code is called twice when setting up AVX. The compiler would just use a jmp instruction anyway, but to make this file cleaner, I chose to put it here. This function is not visible to any other code. */ void AVXStub() { size_t RBX = 0, RCX = 0, RDX = 0; __asm__ __volatile__("cpuid" : // Instruction "=c" (RCX), "=d" (RDX) : // Outputs (our results come back through here) "a" (0x01) : // Inputs (to rax) "%rbx"); // (anti-)Clobber list // Bit 28 of RCX is the AVX bit - it says whether or not AVX (Advanced Vector Extensions) is available. if (RCX & (1 << 28)) { // If we get here, AVX is available, so we need to enable it. // To do that, we need to set bit 7 of XCR0 (eXtended Control Register 0) // So, read the current register.. size_t XCR0 = ReadExtendedControlRegister(0); // Write it back with bit 7 set WriteExtendedControlRegister(0, XCR0 | 0x7); // Double check it was set properly.. XCR0 = ReadExtendedControlRegister(0); if ((XCR0 & 0x7) == 0x7) { // AVX is now available, we can move on to AVX2 and AVX512. // Like before, we need to check first. __asm__ __volatile__("cpuid": // Instruction "=b" (RBX), "=c" (RCX), "=d" (RDX) : // Get our values out "a" (0x07), "c" (0x00) :); // Pass 0x07 to RAX and 0x00 to RCX. // Leaf 7/0 (0x07 in EAX and 0x00 in ECX) means Extended Features. // We're interested in bit 16, which is "avx512f", which says whether AVX-512 is available. if (RBX & (1 << 16)) { // If we get here, AVX512 is available, so we need to enable it. // To do that, we need to set bits 0xE7 of XCR0 (eXtended Control Register 0) size_t XCR0 = ReadExtendedControlRegister(0); WriteExtendedControlRegister(0, XCR0 | 0xE7); // Double check it was set properly.. if ((XCR0 & 0xE7) == 0xE7) { // AVX512 was enabled. We can now use parallel-optimised functions. FillScreen(Print_Info.defaultGPU, Print_Info.charBGColor); printf("AVX512 available and enabled.\r\n"); } else { // AVX512 was not enabled for some reason. printf("Unable to set AVX512. Please debug later.\r\n"); } // avx512f also includes a bunch of other AVX512 related features, so we should check those too. printf("Checking for other AVX512 features.."); if (RBX & (1 << 17)) { printf("AVX512-DQ is available.\r\n"); } if (RBX & (1 << 21)) { printf("AVX512-IFMA is available.\r\n"); } if (RBX & (1 << 26)) { printf("AVX512-PF is available.\r\n"); } if (RBX & (1 << 27)) { printf("AVX512-ER is available.\r\n"); } if (RBX & (1 << 28)) { printf("AVX512-CD is available.\r\n"); } if (RBX & (1 << 30)) { printf("AVX512-BW is available.\r\n"); } if (RBX & (1 << 31)) { printf("AVX512-VL is available.\r\n"); } if (RCX & 1) { printf("AVX512-VBMI is available.\r\n"); } if (RCX & (1 << 6)) { printf("AVX512-VBMI2 is available.\r\n"); } if (RCX & (1 << 11)) { printf("AVX512-VNNI is available.\r\n"); } if (RCX & (1 << 12)) { printf("AVX512-BITALG is available.\r\n"); } if (RCX & (1 << 14)) { printf("AVX512-VPOPCNTDQ is available.\r\n"); } if (RDX & (1 << 2)) { printf("AVX512-4VNNIW is available.\r\n"); } if (RDX & (1 << 3)) { printf("AVX512-4FMAPS is available.\r\n"); } printf("End of AVX512 features.\r\n"); } else { // If we get here, AVX512 is not supported. FillScreen(Print_Info.defaultGPU, Print_Info.charBGColor); printf("AVX/AVX2 supported and enabled.\r\nAVX512 is not supported.\r\n"); } // Now we can check for AVX2. // This is stored in bit 5 of RBX returned from CPUID. if (RBX & (1 << 5)) { printf("AVX2 is supported.\r\n"); } else { printf("AVX2 is not supported.\r\n"); } } else { // If we get here, AVX is supported but for whatever reason, we couldn't enable it. printf("Unable to enable AVX.\r\n"); } } else { // If we get here, AVX is not supported. // So, we check for the latest features of the CPU. printf("AVX is not supported.\r\nChecking for latest CPU features..\r\n"); // Bit 20 of RCX is sse4.2, which says whether SSE4.2 instructions are supported. if (RCX & (1 << 20)) { printf("SSE 4.2 is supported.\r\n"); } else { // This must be quite an old processor. // Bit 19 of RCX is sse4.1, which says whether SSE4.1 instructions are supported.. if (RCX & (1 << 19)) { printf("SSE 4.1 is supported.\r\n"); } else { // Going back in time.. // Bit 9 of RCX is ssse3, which says whether Supplemental SSE3 instructions are supported. if (RCX & (1 << 9)) { printf("SSE3 is supported.\r\n"); } else { // Gee gosh. Okay, there's another place we can check for SSE3.. // Bit 1 of RCX is sse3, which says whether Prescott SSE3 instructions are supported. if (RCX & 1) { printf("SSE3 is supported.\r\n"); } else { // Dear me. We might have an issue.. // Bit 26 of RDX is sse2, which says whether SSE2 instructions are supported. // SSE2 is required for a processor to be x86_64. if (RDX & (1 << 26)) { printf("SSE2 is supported.\r\n"); } else { // If we get here, the computer is a paradox. Or it isn't to spec. // This kernel is x86_64, therefore for a computer to load it, the processor must also be x86_64. // But if we get here, SSE2 is not supported, which is a requirement for x86_64. printf("Bad CPU detected - x86_64 requires SSE2 but the processor does not support it.\r\n"); } } } } } } } void PrepareMaskableInterrupts(void) { // Maskable Interrupts include things like keyboard inputs. // To enable them, we set a few flags in RFLAGS. size_t RFLAGS = ReadControlRegister('f'); // Bit 9 is IE (Interrupt Enable). if (RFLAGS & (1 << 9)) { printf("Interrupts are already enabled.\r\n"); // This should be the default state after booting from Syncboot. } else { // Write bit 9 into the register WriteControlRegister('f', RFLAGS | (1 << 9)); // Double check, some processors are tricky sometimes. size_t IE = ReadControlRegister('f'); if (RFLAGS == IE) { printf("Unable to enable interrupts.\r\n"); } else { printf("Interrupts enabled.\r\n"); } } } void PreparePowerManagement() { // The CPU can handle most power management features since Skylake. // We can enable this with Model-Specific registers. size_t RAX = 0; // To get whether or not Hardware Power Management is available, we need to check // CPUID. // To do *that*, we neeed to use ASM. // The terms are separated by ':'. // The first term is the instruction. // The second term is the outputs. There can and will be more than one, as such happened above. // The third term is inputs. Usually these are stored in "ekx", where k is one of a, b, c, d. // The forth and last term is the clobber list. Most instructions will clobber (leave garbage in) registers unless they are specified here. __asm__ __volatile__("cpuid" : "=a" (RAX) : "a" (0x06) : "%rbx", "%rcx", "%rdx"); // Now that we have our information, we cna do some checks. // Since we passed leaf 6 (0x06 in EAX) to CPUID, we get special information back in the registers. // We only told it to output to one register, RAX. // Thus, we can check the result in our RAX variable. // We're interested in bit 7, which tells us whether or not HWP is available. if (RAX & (1 << 7)) { // If we get here, HWP is available. // To know whether or not to *enable* HWP, we need to read a Model-Specific Register. if (ReadModelSpecificRegister(0x770) & 1) { printf("Hardware Power Management is enabled.\r\n"); } else { // If we get here, we need to enable it manually. To do this, we just set the bit we checked just now. WriteModelSpecificRegister(0x770, 1); // We should double check that the register was changed. In some cases, this means that HWP is either managed by ME, or the CPU is being funky. if(ReadModelSpecificRegister(0x770) & 1) { printf("Hardware Power Management has been enabled.\r\n"); // The message here is slightly different on purpose. } else { printf("Unable to set Hardware Power Management.\r\n"); } } } else { // Sadly, PowerManagement is not available for this processor. printf("Hardware Power Management is not supported.\r\n"); } } /* CheckForHypervisor: * If we're running in a virtual machine, a certain bit in CPUID is set. * This is set by the Hypervisor that runs the VM. * We can check it to see if we're running in a virtual machine. */ uint8_t CheckForHypervisor() { size_t RCX; __asm__ __volatile__("cpuid" : "=c" (RCX) : "a" (1) : "%rbx", "%rdx"); // Bit 31 of the 1st leaf tells us whether there is a hypervisor running. return (RCX & (1 << 31)); } uint8_t ReadPerformance(size_t* Perfs) { printf("Starting performance check..\r\n"); // We cannot read the performance MSRs in virtual machines. if(CheckForHypervisor()) { printf("Hypervisor detected. Unable to read performance.\r\n"); return 0; } // We need to disable interrupts first. // They will be enabled by the next function. size_t RFLAGS = ReadControlRegister('f'); if((RFLAGS & (1 << 9))) { WriteControlRegister('f', RFLAGS & ~(1 << 9)); } size_t IE = ReadControlRegister('f'); if(IE == RFLAGS) { printf("Unable to disable interrupts for reading performance. Results may be skewed.\r\n"); } // First we need to check for CPU-specific speed features. size_t SpeedCheck = ReadModelSpecificRegister(0x1A0); // Bit 16 is Enhanced SpeedStep. if(SpeedCheck & (1 << 16)) { printf("Enhanced SpeedStep is enabled.\r\n"); } // Bit 38 is Turbo Boost. if(SpeedCheck & (1ULL << 38)) { // Since this is larger than 32 bits, we need to specify Unsigned Long Long, which makes it 64 bits. printf("Turbo Boost is enabled.\r\n"); } Perfs[0] = ReadModelSpecificRegister(0xE8); Perfs[1] = ReadModelSpecificRegister(0xE7); return 1; } size_t ReadCPUFrequency(size_t* Perfs, uint8_t AverageOrDirect) { size_t RAX = 0, RBX = 0, RCX = 0, MaxLEAF = 0, APerf = 1, MPerf = 1; size_t RFLAGS = 0, TFLAGS = 0; if(AverageOrDirect == 1) { // Measure __asm__ __volatile__ ("cpuid":::"%rax", "%rbx", "%rcx", "%rdx"); size_t TAPerf = ReadModelSpecificRegister(0xE8); size_t TMPerf = ReadModelSpecificRegister(0xE7); APerf = TAPerf - Perfs[0]; MPerf = TMPerf - Perfs[0]; } else { // Average (since last poweroff) ReadPerformance(Perfs); } __asm__ __volatile__("cpuid" : "=a" (MaxLEAF) : "a" (0) : "%rbx", "%rcx", "%rdx"); size_t BusMultiplier = (ReadModelSpecificRegister(0xCE) & 0xFF00) >> 8; size_t TurboSpeedControlFrequency = BusMultiplier * 100; if(MaxLEAF >= 0x15) { __asm__ __volatile__("cpuid" : "=a" (RAX), "=b" (RBX), "=c" (RCX) : "a" (0x15) : "%rdx"); if((RCX) && (RBX)) { // RCX contains the nominal clock frequency return ((RCX / 1000000) * RBX * APerf) / (RAX * MPerf); } if(RCX == 0) { size_t Val; __asm__ __volatile__("cpuid" : "=a" (Val) : "a" (1) : "%rbx", "%rcx", "%rdx"); if( ((Val & 0xF0FF0) == 0x906E0) || ((Val & 0xF0FF0) == 0x806E0) || ((Val & 0xF0FF0) == 0x506E0) || ((Val & 0xF0FF0) == 0x406E0)) { return (24 * RBX * APerf) / (RAX * MPerf); } // There are far more edge cases here. Maybe peek at the Linux kernel? } } // CPUID is not useful. So, fall back to the Sandybridge method. size_t Frequency = (TurboSpeedControlFrequency * APerf) / MPerf; // Re-enable interrupts. Assuming they're disabled by the prior function. RFLAGS = ReadControlRegister('f'); WriteControlRegister('f', RFLAGS | (1 << 9)); TFLAGS = ReadControlRegister('f'); if(TFLAGS == RFLAGS) { printf("Unable to enable interrupts after reading performance.\r\n"); } printf("CPU Frequency is %llu\r\n", Frequency); return Frequency; } uint32_t ReadPort(uint16_t Port, int Length) { uint32_t Data; if(Length == 1) { // Read a byte __asm__ __volatile__("inb %[address], %[value]" : : [value] "a" ((uint8_t) Data), [address] "d" (Port) :); } else if (Length == 2) { // Read a word __asm__ __volatile__("inw %[address], %[value]" : : [value] "a" ((uint16_t) Data), [address] "d" (Port) :); } else if (Length == 4) { // Read a long (dword) __asm__ __volatile__("inl %[address], %[value]" : : [value] "a" (Data), [address] "d" (Port) :); } else { printf("ReadPort: Invalid Read Length.\r\n"); } return Data; } uint32_t WritePort(uint16_t Port, uint32_t Data, int Length) { if(Length == 1) { // Write a byte __asm__ __volatile__("outb %[value], %[address]" : : [value] "a" ((uint8_t) Data), [address] "d" (Port) :); } else if (Length == 2) { // Write a word __asm__ __volatile__("outw %[value], %[address]" : : [value] "a" ((uint16_t) Data), [address] "d" (Port) :); } else if (Length == 4) { // Write a long (dword) __asm__ __volatile__("outl %[value], %[address]" : : [value] "a" (Data), [address] "d" (Port) :); } else { printf("WritePort: Invalid Write Length.\r\n"); } return Data; } size_t ReadModelSpecificRegister(size_t MSR) { size_t RegHigh = 0, RegLow = 0; __asm__ __volatile__("rdmsr" : "=a" (RegLow), "=d" (RegHigh) : "c" (MSR) :); return (RegHigh << 32 | RegLow); } size_t WriteModelSpecificRegister(size_t MSR, size_t Data) { size_t DataLow = 0, DataHigh = 0; DataLow = ((uint32_t* )&Data)[0]; DataHigh = ((uint32_t* )&Data)[1]; __asm__ __volatile__("wrmsr" : : "a" (DataLow), "c" (MSR), "d" (DataHigh) : ); return Data; } // VMXCSR - Vex-Encoded MXCSR. These are preferred when AVX is available. uint32_t ReadVexMXCSR() { uint32_t Data; __asm__ __volatile__("vstmxcsr %[dest]" : [dest] "=m" (Data) : :); return Data; } uint32_t WriteVexMXCSR(uint32_t Data) { __asm__ __volatile__("vldmxcsr %[src]" : : [src] "m" (Data) :); return Data; } // MXCSR - SSE Control Register. uint32_t ReadMXCSR() { uint32_t Data; __asm__ __volatile__("stmxcsr %[dest]" : [dest] "=m" (Data) : :); return Data; } uint32_t WriteMXCSR(uint32_t Data) { __asm__ __volatile__("ldmxcsr %[src]" : : [src] "m" (Data) :); return Data; } // Control Register : CRX + RFLAGS. Specify 'f' for RFLAGS, X for CRX. size_t ReadControlRegister(int CRX) { size_t Data; switch(CRX) { case 0: __asm__ __volatile__("mov %%cr0, %[dest]" : [dest] "=r" (Data) : :); break; case 1: __asm__ __volatile__("mov %%cr1, %[dest]" : [dest] "=r" (Data) : :); break; case 2: __asm__ __volatile__("mov %%cr2, %[dest]" : [dest] "=r" (Data) : :); break; case 3: __asm__ __volatile__("mov %%cr3, %[dest]" : [dest] "=r" (Data) : :); break; case 4: __asm__ __volatile__("mov %%cr4, %[dest]" : [dest] "=r" (Data) : :); break; case 8: __asm__ __volatile__("mov %%cr8, %[dest]" : [dest] "=r" (Data) : :); break; case 'f': // Push flags and pop them into our buffer __asm__ __volatile__("pushfq\n\t" "popq %[dest]" : [dest] "=r" (Data) : :); break; default: break; } return Data; } size_t WriteControlRegister(int CRX, size_t Data) { switch(CRX) { case 0: __asm__ __volatile__("mov %[dest], %%cr0" : : [dest] "r" (Data) :); break; case 1: __asm__ __volatile__("mov %[dest], %%cr1" : : [dest] "r" (Data) :); break; case 2: __asm__ __volatile__("mov %[dest], %%cr2" : : [dest] "r" (Data) :); break; case 3: __asm__ __volatile__("mov %[dest], %%cr3" : : [dest] "r" (Data) :); break; case 4: __asm__ __volatile__("mov %[dest], %%cr4" : : [dest] "r" (Data) :); break; case 8: __asm__ __volatile__("mov %[dest], %%cr8" : : [dest] "r" (Data) :); break; case 'f': __asm__ __volatile__("pushq %[dest]\n\t" "popfq" : : [dest] "r" (Data) : "cc"); break; default: break; } return Data; } // XCR = eXtended Control Register. // XCR0 is used to enable AVX/SSE. size_t ReadExtendedControlRegister(size_t XCRX) { size_t RegHigh = 0, RegLow = 0; __asm__ __volatile__("xgetbv" : "=a" (RegLow), "=d" (RegHigh) : "c" (XCRX) :); return (RegHigh << 32 | RegLow); } size_t WriteExtendedControlRegister(size_t XCRX, size_t Data) { __asm__ __volatile__("xsetbv" : : "a" ( ((uint32_t*)&Data)[0]), "c" (XCRX), "d" ( ((uint32_t*)&Data)[1] ) :); return Data; } // The following two functions are utility - for determining whether we're operating in Long Mode. // TODO: Move into DescriptorTables.c size_t ReadXCS() { size_t Data = 0; __asm__ __volatile__("mov %%cs, %[dest]" : [dest] "=r" (Data) : :); return Data; } DESCRIPTOR_TABLE_POINTER FetchGDT() { DESCRIPTOR_TABLE_POINTER GDTrData = {0}; __asm__ __volatile__("sgdt %[dest]" : [dest] "=m" (GDTrData) : :); return GDTrData; } void SetGDT(DESCRIPTOR_TABLE_POINTER GDTrData) { __asm__ __volatile__("lgdt %[src]" : : [src] "m" (GDTrData) :); } DESCRIPTOR_TABLE_POINTER FetchIDT() { DESCRIPTOR_TABLE_POINTER IDTrData = {0}; __asm__ __volatile__("sidt %[dest]" : [dest] "=m" (IDTrData) : :); return IDTrData; } void SetIDT(DESCRIPTOR_TABLE_POINTER IDTrData) { __asm__ __volatile__("lidt %[src]" : : [src] "m" (IDTrData) :); } // LDT = Local Descriptor Table (= GDT entry for current segment) uint16_t FetchLDT() { uint16_t LDTrData = 0; __asm__ __volatile__("sldt %[dest]" : [dest] "=m" (LDTrData) : :); return LDTrData; } void SetLDT(uint16_t LDTrData) { __asm__ __volatile__("lldt %[src]" : : [src] "m" (LDTrData) :); } // TSR - Tast State Register uint16_t FetchTSR() { uint16_t TSRData = 0; __asm__ __volatile__ ("str %[dest]" : [dest] "=m" (TSRData) : :); return TSRData; } void SetTSR(uint16_t TSRData) { __asm__ __volatile__("ltr %[src]" : : [src] "m" (TSRData) :); } void InstallGDT() { DESCRIPTOR_TABLE_POINTER GDTData = {0}; size_t TSS64Address = (size_t)&TSS64; uint16_t TSSBase1 = (uint16_t)TSS64Address; uint8_t TSSBase2 = (uint8_t)(TSS64Address >> 16); uint8_t TSSBase3 = (uint8_t)(TSS64Address >> 24); uint32_t TSSBase4 = (uint8_t)(TSS64Address >> 32); GDTData.Limit = sizeof(InitialGDT) - 1; GDTData.BaseAddress = (size_t)InitialGDT; ((TSS_ENTRY*) &((GDT_ENTRY*)InitialGDT)[3])->BaseLow = TSSBase1; ((TSS_ENTRY*) &((GDT_ENTRY*)InitialGDT)[3])->BaseMiddle1 = TSSBase2; ((TSS_ENTRY*) &((GDT_ENTRY*)InitialGDT)[3])->BaseMiddle2 = TSSBase3; ((TSS_ENTRY*) &((GDT_ENTRY*)InitialGDT)[3])->BaseHigh = TSSBase4; SetGDT(GDTData); SetTSR(0x18); // 0x18 >> 3 == GDT[3] UpdateSegments(); } static void UpdateSegments() { // We can't use the ASM method in x86_64. AKA, we cannot far jump into the code segment to update CS. // As such, we have to do some juggling with registers. // This will look insane. __asm__ __volatile__ ("mov $16, %ax\n\t" // 16 >> 3 = GDT[2] = Data Segment "mov %ax, %ds\n\t" "mov %ax, %es\n\t" "mov %ax, %fs\n\t" "mov %ax, %gs\n\t" "mov %ax, %ss\n\t" "movq $8, %rdx\n\t" // 8 >> 3 = GDT[1] == Code Segment "leaq 4(%rip), %rax\n\t" // Store the instruction immediately after iretq. "pushq %rdx\n\t" "pushq %rax\n\r" "lretq\n\t"); // the instruction stored here points to here, so execution continues immediately after the return. // This way, the compiler is happy, and we're now in the new segments. } // Exception stacks #define PAGE (1 << 12) __attribute((aligned(64))) static volatile uint8_t NMIStack[PAGE] = {0}; __attribute((aligned(64))) static volatile uint8_t DoubleFaultStack[PAGE] = {0}; __attribute((aligned(64))) static volatile uint8_t MachineCheckStack[PAGE] = {0}; __attribute((aligned(64))) static volatile uint8_t BreakPointStack[PAGE] = {0}; void InstallIDT() { DESCRIPTOR_TABLE_POINTER IDT_Data = {0}; IDT_Data.Limit = sizeof(IDTData) - 1; IDT_Data.BaseAddress = (size_t) IDTData; TSS64.IST1 = (size_t) NMIStack; TSS64.IST2 = (size_t) DoubleFaultStack; TSS64.IST3 = (size_t) MachineCheckStack; TSS64.IST4 = (size_t) BreakPointStack; // Set the gates InstallInterrupt(0, (size_t) ISR0Handler); InstallInterrupt(1, (size_t) ISR1Handler); InstallInterrupt(2, (size_t) ISR2Handler); InstallInterrupt(3, (size_t) ISR3Handler); InstallInterrupt(4, (size_t) ISR4Handler); InstallInterrupt(5, (size_t) ISR5Handler); InstallInterrupt(6, (size_t) ISR6Handler); InstallInterrupt(7, (size_t) ISR7Handler); InstallInterrupt(8, (size_t) ISR8Handler); InstallInterrupt(9, (size_t) ISR9Handler); InstallInterrupt(10, (size_t) ISR10Handler); InstallInterrupt(11, (size_t) ISR11Handler); InstallInterrupt(12, (size_t) ISR12Handler); InstallInterrupt(13, (size_t) ISR13Handler); InstallInterrupt(14, (size_t) ISR14Handler); InstallInterrupt(15, (size_t) ISR15Handler); InstallInterrupt(16, (size_t) ISR16Handler); InstallInterrupt(17, (size_t) ISR17Handler); InstallInterrupt(18, (size_t) ISR18Handler); InstallInterrupt(19, (size_t) ISR19Handler); InstallInterrupt(20, (size_t) ISR20Handler); InstallInterrupt(30, (size_t) ISR30Handler); // 21 - 31 are reserved. for(size_t i = 1; i < 11; i++) { if( i != 9 ) { // Don't want to overwrite ISR30 InstallInterrupt(i + 20, (size_t)ReservedISRHandler); } } // Put custom ISRs here. SetIDT(IDT_Data); } // Sets the correct entry in the IDT. // Might be worth looking into using IST stack switching. static void InstallInterrupt(size_t ISR, size_t Address) { uint16_t ISRBase1 = (uint16_t) Address; uint16_t ISRBase2 = (uint16_t) (Address >> 16); uint32_t ISRBase3 = (uint16_t) (Address >> 32); IDTData[ISR].LowBase = ISRBase1; IDTData[ISR].Segment = 0x08; // Code Segment IDTData[ISR].IST = 0; IDTData[ISR].SegmentType = 0x8E; // Interrupt IDTData[ISR].MiddleBase = ISRBase2; IDTData[ISR].HighBase = ISRBase3; IDTData[ISR].Reserved = 0; } // Set up paging in the CPU. // This is going to be the most changed thing, so it should be moved to its own file. #define PAGETABLE_SIZE 512 * 8 void InstallPaging() { // Before we start, we need to write a control register. // Bit 7 of CR4 is PGE (Page Global Enabled), which says whtether address translations can be used across address spaces // AKA, it says whether or not the page table is an identity map (1:1 relation between page table to RAM) size_t CR4 = ReadControlRegister(4); if(CR4 & (1 << 7)) { // If bit 7 is set // We need to turn it off, otherwise UEFI will leave stuff behind. WriteControlRegister(4, CR4 ^ (1 << 7)); // Double check it was set size_t PGE = ReadControlRegister(4); if(PGE == CR4) { printf("Error disabling Page Global.\r\n"); } } // Before we start mapping memory, we need to know how much there is. size_t MaxMemory = FetchMemoryLimit(); // We also need to know how big the pages can be. // CPUID can do this for us. size_t RDX = 0; __asm__ __volatile__("cpuid" : "=d" (RDX) : "a" (0x80000001) : "%rbx", "%rcx"); if(RDX & (1 << 26)) { printf("Using 1GB pages.\r\n"); // For future proofing, we can check if Level-5 paging is enabled. // To do this, we check bit 12 (LA57) of CR4. CR4 = ReadControlRegister(4); if (CR4 & (1 << 12)) { // We can use 5 level paging. printf("Using 5-Level paging.\r\n"); // We need to check, just in case. if(MaxMemory > (1ULL << 57)) { // 1 << 57 = 128PB printf("Max RAM is 128PB. Please consider using a better OS with your supercomputer.\r\n"); printf("This isn't an error. You'll just be limited to 128PB. *just*.\r\n"); } // PML = Page Map Level // Keeps track of how many PML5 entries there are size_t MaxPML5 = 1; // Keeps track of how many PML4 entries there are size_t MaxPML4 = 1; // Keeps track of how many PDP entries there are size_t MaxPDP = 512; // Going to top-down search from 512 size_t LastPL4Entry = 512; size_t LastPDPEntry = 512; // PML5 entries can track 256PB of RAM. So we need to count how many x256PB sections can fit into memory. // This is usually one. while (MaxMemory > (256ULL << 30)) { MaxPML5++; MaxMemory -= (256ULL << 30); } // We can't have more than 512 entries. This is a *lot* of RAM, though. if (MaxPML5 > 512) { MaxPML5 = 512; } // We will always need to make sure the rest of memory is paged. if (MaxMemory) { // (MaxMemory + ((1 << 30) - 1 )) undoes the MaxMemory -= (256ULL << 30) from earlier. // Masking with ~0ULL (64 bits of 1) makes sure that all of the available memory is captured // Shifting it back 30 bits allows us to capture the memory address. LastPDPEntry = ( (MaxMemory + ((1 << 30) - 1)) & (~0ULL << 30)) >> 30; // We need to truncate again. if (MaxPML5 > 512) { MaxPML5 = 512; } } // Now we need to calculate how much space the page tables themselves will consume. size_t PML4Size = PAGETABLE_SIZE * MaxPML5; size_t PDPSize = PML4Size * MaxPML4; EFI_PHYSICAL_ADDRESS PML4Base = AllocatePagetable(PML4Size + PDPSize); EFI_PHYSICAL_ADDRESS PDPBase = PML4Base + PML4Size; // Now we know how big the tables are, and where they are, we can start populating them. for(size_t PML5Entry = 0; PML5Entry < MaxPML5; PML5Entry++) { // Set PML4, make sure it's page-aligned. FirstPageTable[PML5Entry] = PML4Base + (PML5Entry << 12); if (PML5Entry == (MaxPML5 - 1)) { MaxPML4 = LastPL4Entry; } for(size_t PML4Entry = 0; PML4Entry < MaxPML4; PML4Entry++) { ((size_t* )FirstPageTable[PML5Entry])[PML4Entry] = PDPBase + (((PML5Entry << 9) + PML4Entry) << 12); if( (PML5Entry == (MaxPML5 - 1)) && (PML4Entry == (MaxPML4 - 1)) ) { MaxPDP = LastPDPEntry; } for(size_t PDPEntry = 0; PDPEntry < MaxPDP; PDPEntry++) { // This is the table that defines the 1GB pages. // There will only be 1 PML4 entry, unless the system has >512GB of RAM. // There will only be 1 PML5 entry, unless the system has >256TB of RAM. ((size_t* ) ((size_t* )FirstPageTable[PML5Entry])[PML4Entry])[PDPEntry] = ( ((PML5Entry << 18) + (PML4Entry << 9) + PDPEntry) << 30) | (0x83); } // Set R/W and P to 1 ((size_t* )FirstPageTable[PML5Entry])[PML4Entry] |= 0x3; } // Set R/W and P to 1 FirstPageTable[PML5Entry] |= 0x3; } } else { // 5-level paging isn't supported. // We can use 4-level paging. // It supports up to 256TB of RAM, which is overkill. printf("4-Level paging enabled.\r\n"); if(MaxMemory > (1ULL << 48)) { printf("RAM will be limited to 256TB.\r\n"); } size_t MaxPML4 = 1; size_t MaxPDP = 512; size_t LastPDPEntry = 512; // Each PML4 entry is for a whole 512GB of RAM. // Again, usually there will only be one of these. while(MaxMemory > (512ULL << 30)) { MaxPML4++; MaxMemory -= (512ULL << 30); } if(MaxPML4 > 512) { MaxPML4 = 512; } // Start paging the rest of memory if(MaxMemory) { LastPDPEntry = ( (MaxMemory + ((1 << 30) - 1)) & (~0ULL << 30)) >> 30; if(LastPDPEntry > 512) { LastPDPEntry = 512; } } size_t PDPSize = PAGETABLE_SIZE * MaxPML4; EFI_PHYSICAL_ADDRESS PDPBase = AllocatePagetable(PDPSize); for(size_t PML4Entry = 0; PML4Entry < MaxPML4; PML4Entry++ ) { FirstPageTable[PML4Entry] = PDPBase + (PML4Entry << 12); if(PML4Entry == (MaxPML4 - 1)) { MaxPDP = LastPDPEntry; } for(size_t PDPEntry = 0; PDPEntry < MaxPDP; PDPEntry++) { ((size_t* )FirstPageTable[PML4Entry])[PDPEntry] = (((PML4Entry << 9) + PDPEntry) << 30) | 0x83; } FirstPageTable[PML4Entry] |= 0x3; } } } else { // We can't use 1GiB pages. Fall back to 2MiB pages. printf("1GiB pages are not supported. Falling back to 2MiB pages.\r\n"); if(MaxMemory > (1ULL << 48)) { printf("RAM will be limited to 256TB. Page tables will occupy 1GiB of space.\r\nHowever, that is only 1/500000 of the available space.\r\n"); } size_t MaxPML4 = 1; size_t MaxPDP = 512; size_t MaxPD = 512; size_t LastPDPEntry = 1; while(MaxMemory > (512ULL << 30)) { MaxPML4++; MaxMemory -= (512ULL << 30); } if(MaxPML4 > 512) { MaxPML4 = 512; } if(MaxMemory) { LastPDPEntry = ((MaxMemory + ((1 << 30) - 1)) & (~0ULL << 30)) > 30; if(LastPDPEntry > 512) { LastPDPEntry = 512; } } size_t PDPSize = PAGETABLE_SIZE * MaxPML4; size_t PDSize = PDPSize * MaxPDP; EFI_PHYSICAL_ADDRESS PDPBase = AllocatePagetable(PDPSize + PDSize); EFI_PHYSICAL_ADDRESS PDBase = PDPBase + PDSize; for(size_t PML4Entry = 0; PML4Entry < MaxPML4; PML4Entry++) { FirstPageTable[PML4Entry] = PDBase + (PML4Entry << 12); if(PML4Entry == (MaxPML4 - 1)) { MaxPDP = LastPDPEntry; } for(size_t PDPEntry = 0; PDPEntry < MaxPDP; PDPEntry++) { ((size_t* )FirstPageTable[PML4Entry])[PDPEntry] = PDBase + (((PML4Entry << 9) + PDPEntry) << 12); for(size_t PDEntry = 0; PDEntry < MaxPD; PDEntry++) { ((size_t* )((size_t* )FirstPageTable[PML4Entry])[PDPEntry])[PDEntry] == (((PML4Entry << 18) + (PDPEntry << 9) + PDEntry) << 21) | 0x83; } ((size_t* )FirstPageTable[PML4Entry])[PDPEntry] |= 0x3; } FirstPageTable[PML4Entry] |= 0x3; } } WriteControlRegister(3, (size_t)FirstPageTable); // Hyper-V has an issue with this line. // TODO: Look into this. // Now that we've set the page table, we can re-enable Page Global. CR4 = ReadControlRegister(4); if(!(CR4 & (1 << 7))) { WriteControlRegister(4, CR4 ^ (1 << 7)); if(ReadControlRegister(4) == CR4) { printf("Error setting CR4.PGE."); } } } // Gets the name of the processor. char* FetchBrandStr(uint32_t* String) { size_t RAX = 0, RBX = 0, RCX = 0, RDX = 0; // This is done using our old friend CPUID. // This clobbers every register, so we need to use them all. __asm__ __volatile__("cpuid" : "=a" (RAX), "=b" (RBX), "=c" (RCX), "=d" (RDX) : "a" (0x80000000) :); // From the Wiki article on CPUID: It is necessary to check whether the feature is present in the CPU by issuing CPUID with EAX = 80000000h first and checking if the returned value is greater or equal to 80000004h. if(RAX >= 0x80000004) { // To get the full 48-byte string, we need to call CPUID with: // 80000002 // 80000003 // 80000004 // In sequence. __asm__ __volatile__("cpuid" : "=a" (RAX), "=b" (RBX), "=c" (RCX), "=d" (RDX) : "a" (0x8000002) :); BrandStr[0] = ((uint32_t*) &RAX)[0]; BrandStr[1] = ((uint32_t*) &RBX)[0]; BrandStr[2] = ((uint32_t*) &RCX)[0]; BrandStr[3] = ((uint32_t*) &RDX)[0]; __asm__ __volatile__("cpuid" : "=a" (RAX), "=b" (RBX), "=c" (RCX), "=d" (RDX) : "a" (0x8000003) :); BrandStr[4] = ((uint32_t*) &RAX)[0]; BrandStr[5] = ((uint32_t*) &RBX)[0]; BrandStr[6] = ((uint32_t*) &RCX)[0]; BrandStr[7] = ((uint32_t*) &RDX)[0]; __asm__ __volatile__("cpuid" : "=a" (RAX), "=b" (RBX), "=c" (RCX), "=d" (RDX) : "a" (0x8000004) :); BrandStr[8] = ((uint32_t*) &RAX)[0]; BrandStr[9] = ((uint32_t*) &RBX)[0]; BrandStr[10] = ((uint32_t*) &RCX)[0]; BrandStr[11] = ((uint32_t*) &RDX)[0]; return (char* )BrandStr; } else { // Brand String not supported. // TODO: Maybe some tests to try to figure out the processor manually? printf("BrandStr not supported by the processor.\r\n"); return NULL; } } /* The following are known processor manufacturer ID strings: "AMDisbetter!" – early engineering samples of AMD K5 processor "AuthenticAMD" – AMD "CentaurHauls" – Centaur (Including some VIA CPU) "CyrixInstead" – Cyrix "HygonGenuine" – Hygon "GenuineIntel" – Intel "TransmetaCPU" – Transmeta "GenuineTMx86" – Transmeta "Geode by NSC" – National Semiconductor "NexGenDriven" – NexGen "RiseRiseRise" – Rise "SiS SiS SiS " – SiS "UMC UMC UMC " – UMC "VIA VIA VIA " – VIA "Vortex86 SoC" – Vortex The following are known ID strings from virtual machines: "bhyve bhyve " – bhyve "KVMKVMKVM" – KVM "Microsoft Hv" – Microsoft Hyper-V or Windows Virtual PC " lrpepyh vr" – Parallels (it possibly should be "prl hyperv ", but it is encoded as " lrpepyh vr" due to an endianness mismatch) "VMwareVMware" – VMware "XenVMMXenVMM" – Xen HVM "ACRNACRNACRN" - Project ACRN */ char* FetchManufacturer(char* ManufacturerStr) { size_t RBX = 0, RCX = 0, RDX = 0; // The manufacturer string is in the first leaf. __asm__ __volatile__("cpuid" : "=b" (RBX), "=c" (RCX), "=d" (RDX) : "a" (0) :); ManufacturerStr[0] = ((char* )&RBX)[0]; ManufacturerStr[1] = ((char* )&RBX)[1]; ManufacturerStr[2] = ((char* )&RBX)[2]; ManufacturerStr[3] = ((char* )&RBX)[3]; ManufacturerStr[4] = ((char* )&RDX)[0]; ManufacturerStr[5] = ((char* )&RDX)[1]; ManufacturerStr[6] = ((char* )&RDX)[2]; ManufacturerStr[7] = ((char* )&RDX)[3]; ManufacturerStr[8] = ((char* )&RCX)[0]; ManufacturerStr[9] = ((char* )&RCX)[1]; ManufacturerStr[10] = ((char* )&RCX)[2]; ManufacturerStr[11] = ((char* )&RCX)[3]; ManufacturerStr[12] = '\0'; return ManufacturerStr; } void ScanCPUFeatures(size_t RAXIn, size_t RCXIn) { size_t RAX = 0, RBX = 0, RCX = 0, RDX = 0; if(RAXIn == 1) { // Scan CPU Features (duh) // This should be the default __asm__ __volatile__("cpuid" : "=a" (RAX), "=b" (RBX), "=c" (RCX), "=d" (RDX) : "a" (1) :); if(RCX & (1 << 31)) { printf("Sync is being run in a Hypervisor.\r\n"); } if(RCX & (1 << 12)) { printf("Processor supports FMA."); } else { printf("Processor does not support FMA.\r\n"); } if(RCX & 1) { if(RCX & (1 << 25)) { printf("AESNI + PCLMULQDQ supported.\r\n"); } else { printf("PCLMULQDQ supported, but not AESNI.\r\n"); } } AVXStub(); if(RCX & (1 << 29)) { printf("F16C supported.\r\n"); } if(RDX & (1 << 22)) { printf("ACPI via MSR supported.\r\n"); } else { printf("ACPI via MSR not supported.\r\n"); } if(RDX & (1 << 24)) { printf("FXSR supported.\r\n"); } } else if(RAXIn == 7 && RCXIn == 0) { // AVX Features AVXStub(); } else if(RAXIn == 0x80000000) { // Processor Brand String char* Brand[48] = {0}; FetchBrandStr(&Brand); printf("Processor Brand: %.48sr\\n", Brand); } else if(RAXIn == 0x8000001) { // Paging features __asm__ __volatile__("cpuid" : "=a" (RAX), "=b" (RBX), "=c" (RCX), "=d" (RDX) : "a" (RAXIn) :); if(RDX & (1 << 26)) { printf("1GiB pages supported.\r\n"); } else { printf("1GiB pages are not supported.\r\n"); } if(RDX & (1 << 29)) { printf("Long Mode is supported.\r\n"); } } else { // Just do what is asked of us. __asm__ __volatile__("cpuid" : "=a" (RAX), "=b" (RBX), "=c" (RCX), "=d" (RDX) : "a" (RAXIn) :); printf("rax: %#qx\r\nrbx: %#qx\r\nrcx: %#qx\r\nrdx: %#qx\r\n", RAX, RBX, RCX, RDX); } }