Skip to content

Commit b69d0da

Browse files
Andy Rossnashif
authored andcommitted
arch/x86_64: New architecture added
This patch adds a x86_64 architecture and qemu_x86_64 board to Zephyr. Only the basic architecture support needed to run 64 bit code is added; no drivers are added, though a low-level console exists and is wired to printk(). The support is built on top of a "X86 underkernel" layer, which can be built in isolation as a unit test on a Linux host. Limitations: + Right now the SDK lacks an x86_64 toolchain. The build will fall back to a host toolchain if it finds no cross compiler defined, which is tested to work on gcc 8.2.1 right now. + No x87/SSE/AVX usage is allowed. This is a stronger limitation than other architectures where the instructions work from one thread even if the context switch code doesn't support it. We are passing -no-sse to prevent gcc from automatically generating SSE instructions for non-floating-point purposes, which has the side effect of changing the ABI. Future work to handle the FPU registers will need to be combined with an "application" ABI distinct from the kernel one (or just to require USERSPACE). + Paging is enabled (it has to be in long mode), but is a 1:1 mapping of all memory. No MMU/USERSPACE support yet. + We are building with -mno-red-zone for stack size reasons, but this is a valuable optimization. Enabling it requires automatic stack switching, which requires a TSS, which means it has to happen after MMU support. + The OS runs in 64 bit mode, but for compatibility reasons is compiled to the 32 bit "X32" ABI. So while the full 64 bit registers and instruction set are available, C pointers are 32 bits long and Zephyr is constrained to run in the bottom 4G of memory. Signed-off-by: Andy Ross <[email protected]>
1 parent ff0ab5d commit b69d0da

File tree

55 files changed

+2890
-18
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+2890
-18
lines changed

arch/Kconfig

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ config X86
3232
select ATOMIC_OPERATIONS_BUILTIN
3333
select HAS_DTS
3434

35+
config X86_64
36+
bool "x86_64 architecture"
37+
select ATOMIC_OPERATIONS_BUILTIN
38+
3539
config NIOS2
3640
bool "Nios II Gen 2 architecture"
3741
select ATOMIC_OPERATIONS_C

arch/x86_64/CMakeLists.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
set(X86_64_BASE_CFLAGS
2+
-ffreestanding
3+
-fno-pic
4+
-fno-asynchronous-unwind-tables
5+
-mno-sse
6+
-mno-red-zone)
7+
8+
add_subdirectory(core)
9+
10+
zephyr_compile_options(${X86_64_BASE_CFLAGS} -mx32)
11+
12+
zephyr_link_libraries(-mx32)

arch/x86_64/Kconfig

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
config ARCH
2+
default "x86_64"
3+
4+
config XUK_DEBUG
5+
bool "Debug logging at lowest level"
6+
default n
7+
help
8+
When true, enables debug logging from the XUK layer in very
9+
early boot situations (including the 16 and 32 bit stub
10+
code) on the first serial port (115200 8n1) and VGA text
11+
console. Also wires that output stream to the printk()
12+
function so it can be used before any console drivers are
13+
initialized.
14+
15+
config XUK_APIC_TSC_SHIFT
16+
int "Power-of-two divisor between TSC and APIC timer"
17+
default 6
18+
help
19+
Configures the precision of the APIC timer as a bit shift of
20+
the TSC frequency. High values "slow down" the tick rate of
21+
the APIC timer and allow for longer timeouts at the expense
22+
of precision.
23+
24+
config IRQ_OFFLOAD_VECTOR
25+
int "Interrupt vector for irq_offload"
26+
default 255
27+
help
28+
This is the interrupt vector to use for the self-directed
29+
IPIs used to implement irq_offload(). Most apps will never
30+
change this. It's configurable in case someone wants to
31+
play with its priority.

arch/x86_64/core/CMakeLists.txt

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
zephyr_library()
2+
3+
zephyr_library_sources(
4+
x86_64.c
5+
xuk.c
6+
xuk-stubs-copy.c # <-- generated, see below
7+
)
8+
9+
set(incdir ${PROJECT_BINARY_DIR}/include/generated)
10+
11+
# We want to include two non-x86_64 stubs as sections/symbols in our
12+
# link (one 16 bit code for SMP real mode bootstraping, the other a 32
13+
# bit hook for OS protected mode entry). This is tedious to do with
14+
# the linker directly, so the mechanism picked here is to have a C
15+
# file (which really is all assembly) import them with ".incbin"
16+
# statements. But I can't figure out how to add a dependency to a C
17+
# file directly, so we copy the file so it can live as a separate
18+
# dependency node we control.
19+
#
20+
add_custom_command(
21+
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/xuk-stubs-copy.c
22+
COMMAND ${CMAKE_COMMAND} -E copy
23+
${CMAKE_CURRENT_SOURCE_DIR}/xuk-stubs.c
24+
${CMAKE_CURRENT_BINARY_DIR}/xuk-stubs-copy.c
25+
DEPENDS ${incdir}/xuk-stub16.bin
26+
${incdir}/xuk-stub32.bin
27+
)
28+
29+
add_custom_command(
30+
OUTPUT ${incdir}/xuk-stub16.bin
31+
COMMAND ${CMAKE_C_COMPILER} -m16 -Os ${X86_64_BASE_CFLAGS} -imacros ${AUTOCONF_H}
32+
-c ${CMAKE_CURRENT_SOURCE_DIR}/xuk-stub16.c
33+
-o ${CMAKE_CURRENT_BINARY_DIR}/xuk-stub16.o
34+
COMMAND ${CMAKE_OBJCOPY} -O binary -j .text
35+
${CMAKE_CURRENT_BINARY_DIR}/xuk-stub16.o
36+
${incdir}/xuk-stub16.bin
37+
)
38+
39+
add_custom_command(
40+
OUTPUT ${incdir}/xuk-stub32.bin
41+
COMMAND ${CMAKE_C_COMPILER} -m32 -Os ${X86_64_BASE_CFLAGS} -imacros ${AUTOCONF_H}
42+
-c ${CMAKE_CURRENT_SOURCE_DIR}/xuk-stub32.c
43+
-o ${CMAKE_CURRENT_BINARY_DIR}/xuk-stub32.o
44+
COMMAND ${CMAKE_C_COMPILER} -m32 ${X86_64_BASE_CFLAGS}
45+
-Wl,--build-id=none -nostdlib -nodefaultlibs -nostartfiles
46+
-T ${CMAKE_CURRENT_SOURCE_DIR}/xuk-stub32.ld
47+
${CMAKE_CURRENT_BINARY_DIR}/xuk-stub32.o
48+
-o ${CMAKE_CURRENT_BINARY_DIR}/xuk-stub32.elf
49+
COMMAND ${CMAKE_OBJCOPY} -O binary
50+
${CMAKE_CURRENT_BINARY_DIR}/xuk-stub32.elf
51+
${incdir}/xuk-stub32.bin
52+
)
53+
54+
# The zephyr.elf file generated for an x86_64 binary is a 64 bit
55+
# binary, but Qemu requires a traditional i386 file (because the entry
56+
# point from multiboot is in 386 protected mode). Do a relink dance
57+
# with objcopy to convert. Note use of the same .incbin trick with
58+
# copy, per above.
59+
#
60+
set(qkernel_file ${CMAKE_BINARY_DIR}/zephyr-qemu.elf)
61+
add_custom_target(qemu_kernel_target DEPENDS ${qkernel_file})
62+
add_custom_command(
63+
OUTPUT ${qkernel_file}
64+
DEPENDS zephyr_prebuilt
65+
COMMAND ${CMAKE_OBJCOPY} -O binary ${CMAKE_BINARY_DIR}/zephyr/zephyr.elf ${CMAKE_CURRENT_BINARY_DIR}/zephyr-qemu.bin
66+
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/qemuinc.c ${CMAKE_CURRENT_BINARY_DIR}
67+
COMMAND ${CMAKE_C_COMPILER} -m32 -c ${CMAKE_CURRENT_BINARY_DIR}/qemuinc.c -o ${CMAKE_CURRENT_BINARY_DIR}/zephyr-qemu.o
68+
COMMAND ${CMAKE_C_COMPILER} -m32 -T ${CMAKE_CURRENT_SOURCE_DIR}/xuk64.ld
69+
-Wl,--build-id=none -nostdlib -nodefaultlibs -nostartfiles
70+
-o ${qkernel_file} ${CMAKE_CURRENT_BINARY_DIR}/zephyr-qemu.o
71+
)

arch/x86_64/core/Makefile.xuk

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Any linux host toolchain should work as a default
2+
CC ?= gcc
3+
OBJCOPY ?= objcopy
4+
QEMU ?= qemu-system-x86_64
5+
6+
# No unwind tables is just to save size. No SSE is allowed because GCC
7+
# uses it for miscellaneous optimizations that aren't related to
8+
# floating point, and we don't want to take the traps except on
9+
# threads that definitely need it. No red zone because it's
10+
# incompatible with traditional stack-based interrupt entry.
11+
CFLAGS = -Os -I../include -std=c11 -ffreestanding -fno-pic -fno-asynchronous-unwind-tables -mno-sse -mno-red-zone
12+
13+
LDFLAGS = -Wl,--build-id=none -nostdlib -nodefaultlibs -nostartfiles
14+
15+
# This works great. But note that distros ship no libgcc for the
16+
# target, so once we start to need stuff from that we'll need to move
17+
# to a custom cross compiler.
18+
ARCHFLAGS = -mx32
19+
20+
# The default build target just links the stub files. Broader OS
21+
# builds just care about these files. The xuk.elf target is a
22+
# demonstration kernel.
23+
stubs: xuk-stub32.bin xuk-stub16.bin
24+
25+
# First link the initial 32 bit stub, which goes at the front of our
26+
# image.
27+
xuk-stub32.bin: xuk-stub32.c *.h xuk-stub32.ld
28+
$(CC) -Wall -m32 $(CFLAGS) -c xuk-stub32.c
29+
$(CC) -m32 -T xuk-stub32.ld $(LDFLAGS) -o stub32.elf $(CFLAGS) xuk-stub32.o
30+
$(OBJCOPY) -O binary stub32.elf $@
31+
32+
# This is the main OS image, starting with the 32 bit stub and
33+
# containing all the 64 bit code.
34+
xuk.elf64: xuk-stub32.bin xuk-stub16.bin xuk.c xuk-stubs.c demo-kernel.c *.h xuk64.ld
35+
$(CC) $(ARCHFLAGS) -Wall $(CFLAGS) -c xuk.c
36+
$(CC) $(ARCHFLAGS) -Wall $(CFLAGS) -c xuk-stubs.c
37+
$(CC) $(ARCHFLAGS) -Wall $(CFLAGS) -c demo-kernel.c
38+
$(CC) $(ARCHFLAGS) -T xuk64.ld $(LDFLAGS) -o $@ $(CFLAGS) xuk.o xuk-stubs.o demo-kernel.o
39+
40+
# Final step. We now have an x86_64 ELF binary, which is not a valid
41+
# multiboot image as the entry point is of course 32 bit. It needs to
42+
# be a i386 image, so copy out the segment and relink the blob one
43+
# last time.
44+
xuk.elf: xuk.elf64 xuk64.ld
45+
$(OBJCOPY) -O binary $< xuk.bin
46+
echo '.incbin "xuk.bin"' | as --32 -c - -o xuk32.o
47+
$(CC) -m32 -T xuk64.ld $(LDFLAGS) -o $@ $(CFLAGS) xuk32.o
48+
49+
# We can rely on the bootloader to handover a machine running in 386
50+
# protected mode, but SMP cores start in real mode and need a tiny
51+
# bootstrap layer of 16 bit code.
52+
xuk-stub16.bin: xuk-stub16.c
53+
$(CC) -m16 $(CFLAGS) -c $<
54+
$(OBJCOPY) -O binary -j .text xuk-stub16.o $@
55+
56+
run: xuk.elf
57+
$(QEMU) -serial mon:stdio -smp cpus=2 -icount shift=1 -no-reboot -no-shutdown -d unimp,pcall,guest_errors -kernel $<
58+
59+
clean:
60+
rm -f *.elf *.elf64 *.o *~ *.bin *.disasm

arch/x86_64/core/demo-kernel.c

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
/*
2+
* Copyright (c) 2018 Intel Corporation
3+
*
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
#include "serial.h"
7+
#include "vgacon.h"
8+
#include "printf.h"
9+
#include "xuk.h"
10+
11+
/* Tiny demonstration of the core64 code. Implements enough of an
12+
* "OS" layer to do some simple unit testing.
13+
*/
14+
15+
static void putchar(int c)
16+
{
17+
serial_putc(c);
18+
vgacon_putc(c);
19+
}
20+
21+
void test_timers(void)
22+
{
23+
/* Quickly calibrate the timers against each other. Note that
24+
* the APIC is counting DOWN instead of up! Seems like on
25+
* qemu, the APIC base frequency is 3.7x slower than the tsc.
26+
* Looking at source, it seems like APIC is uniformly shifted
27+
* down from a nominal 1Ghz reference
28+
* (i.e. qemu_get_time_ns()), where the TSC is based on
29+
* cpu_get_ticks() and thus pulls in wall clock time & such.
30+
* If you specify "-icount shift=1", then they synchronize
31+
* properly.
32+
*/
33+
int tsc0, apic0, tsc1, apic1;
34+
35+
__asm__ volatile("rdtsc" : "=a"(tsc0) : : "rdx");
36+
apic0 = _apic.CURR_COUNT;
37+
do {
38+
/* Qemu misbehaves if I spam these registers. */
39+
for (int i = 0; i < 1000; i++) {
40+
__asm__ volatile("nop");
41+
}
42+
43+
__asm__ volatile("rdtsc" : "=a"(tsc1) : : "rdx");
44+
apic1 = _apic.CURR_COUNT;
45+
} while ((tsc1 - tsc0) < 10000 || (apic0 - apic1) < 10000);
46+
printf("tsc %d apic %d\n", tsc1 - tsc0, apic0 - apic1);
47+
}
48+
49+
unsigned int _init_cpu_stack(int cpu)
50+
{
51+
return (long)alloc_page(0) + 4096;
52+
}
53+
54+
void handler_timer(void *arg, int err)
55+
{
56+
printf("Timer expired on CPU%d\n", (int)(long)xuk_get_f_ptr());
57+
}
58+
59+
void handler_f3(void *arg, int err)
60+
{
61+
printf("f3 handler on cpu%d arg %x, triggering INT 0xff\n",
62+
(int)(long)xuk_get_f_ptr(), (int)(long)arg);
63+
__asm__ volatile("int $0xff");
64+
printf("end f3 handler\n");
65+
}
66+
67+
void _unhandled_vector(int vector, int err, struct xuk_entry_frame *f)
68+
{
69+
(void)f;
70+
_putchar = putchar;
71+
printf("Unhandled vector %d (err %xh) on CPU%d\n",
72+
vector, err, (int)(long)xuk_get_f_ptr());
73+
}
74+
75+
void _isr_entry(void)
76+
{
77+
}
78+
79+
void *_isr_exit_restore_stack(void *interrupted)
80+
{
81+
/* Somewhat hacky test of the ISR exit modes. Two ways of
82+
* specifying "this stack", one of which does the full spill
83+
* and restore and one shortcuts that due to the NULL
84+
* return
85+
*/
86+
if (rdtsc() & 1) {
87+
return interrupted;
88+
} else {
89+
return 0;
90+
}
91+
}
92+
93+
void *switch_back_to;
94+
95+
void switch_back(int arg1, int arg2, int arg3)
96+
{
97+
printf("Switching back (%d, %d, %d) sbt %xh\n",
98+
arg1, arg2, arg3, (int)(long)switch_back_to);
99+
xuk_switch(switch_back_to, &switch_back_to);
100+
}
101+
102+
void test_switch(void)
103+
{
104+
static unsigned long long stack[256];
105+
long args[] = { 5, 4, 3 };
106+
int eflags = 0x20; /* interrupts disabled */
107+
108+
long handle = xuk_setup_stack((long)(sizeof(stack) + (char *)stack),
109+
switch_back, eflags, args, 3);
110+
111+
printf("Switching to %xh (stack %xh)\n",
112+
(int)handle, (int)(long)&stack[0]);
113+
__asm__ volatile("cli");
114+
xuk_switch((void *)handle, &switch_back_to);
115+
__asm__ volatile("sti");
116+
printf("Back from switch\n");
117+
}
118+
119+
void local_ipi_handler(void *arg, int err)
120+
{
121+
printf("local IPI handler on CPU%d\n", (int)(long)xuk_get_f_ptr());
122+
}
123+
124+
/* Sends an IPI to the current CPU and validates it ran */
125+
void test_local_ipi(void)
126+
{
127+
printf("Testing a local IPI on CPU%d\n", (int)(long)xuk_get_f_ptr());
128+
129+
_apic.ICR_HI = (struct apic_icr_hi) {};
130+
_apic.ICR_LO = (struct apic_icr_lo) {
131+
.delivery_mode = FIXED,
132+
.vector = 0x90,
133+
.shorthand = SELF,
134+
};
135+
}
136+
137+
void _cpu_start(int cpu)
138+
{
139+
_putchar = putchar;
140+
printf("Entering demo kernel\n");
141+
142+
/* Make sure the FS/GS pointers work, then set F to store our
143+
* CPU ID
144+
*/
145+
xuk_set_f_ptr(cpu, (void *)(long)(0x19283700 + cpu));
146+
xuk_set_g_ptr(cpu, (void *)(long)(0xabacad00 + cpu));
147+
printf("fptr %p gptr %p\n", xuk_get_f_ptr(), xuk_get_g_ptr());
148+
149+
xuk_set_f_ptr(cpu, (void *)(long)cpu);
150+
151+
/* Set up this CPU's timer */
152+
/* FIXME: this sets up a separate vector for every CPU's
153+
* timer, and we'll run out. They should share the vector but
154+
* still have individually-set APIC config. Probably wants a
155+
* "timer" API
156+
*/
157+
xuk_set_isr(INT_APIC_LVT_TIMER, 10, handler_timer, 0);
158+
_apic.INIT_COUNT = 5000000;
159+
test_timers();
160+
161+
if (cpu == 0) {
162+
xuk_set_isr(0x1f3, 0, (void *)handler_f3, (void *)0x12345678);
163+
}
164+
165+
__asm__ volatile("int $0xf3");
166+
167+
/* Fire it all up */
168+
printf("Enabling Interrupts\n");
169+
__asm__ volatile("sti");
170+
printf("Interrupts are unmasked (eflags %xh), here we go...\n",
171+
eflags());
172+
173+
/* Wait a teeny bit then send an IPI to CPU0, which will hit
174+
* the unhandled_vector handler
175+
*/
176+
if (cpu == 1) {
177+
int t0 = rdtsc();
178+
179+
while (rdtsc() - t0 < 1000000) {
180+
}
181+
182+
_apic.ICR_HI = (struct apic_icr_hi) {
183+
.destination = 0
184+
};
185+
_apic.ICR_LO = (struct apic_icr_lo) {
186+
.delivery_mode = FIXED,
187+
.vector = 66,
188+
};
189+
while (_apic.ICR_LO.send_pending) {
190+
}
191+
}
192+
193+
test_switch();
194+
195+
xuk_set_isr(XUK_INT_RAW_VECTOR(0x90), -1, local_ipi_handler, 0);
196+
test_local_ipi();
197+
198+
printf("CPU%d initialized, sleeping\n", cpu);
199+
while (1) {
200+
__asm__ volatile("hlt");
201+
}
202+
}

arch/x86_64/core/offsets/offsets.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
/*
2+
* Copyright (c) 2018 Intel Corporation
3+
*
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/

0 commit comments

Comments
 (0)