section .text align=16 vstart=0x7c00 jmp start start: mov ax, 0 mov ss, ax mov sp, ax ; physic to logic, 32bit div 16bit mov ax, [cs:physic_base] ; low 16 bit mov dx, [cs:physic_base+0x2] ; high 16 bit mov bx, 16 div bx mov ds, ax ; quotient mov es, ax mov di, 1 mov si, 1 ; user program in 1st sector xor bx, bx xor ax, ax call read_sector ; detect the size of user program mov dx, [ds:2] ; high 16 bits mov ax, [ds:0] ; low 16 bits mov bx, 512 ; one sector size div bx cmp dx, 0 jnz .has_remainder sub ax, 1 ; already read one sector .has_remainder: cmp ax, 0 jz user_program_loaded push ds mov cx, ax .read_rest: mov ax, ds add ax, 0x20 mov ds, ax ; seek to the begining where to load next sector xor bx, bx inc si ; read next sector call read_sector loop .read_rest pop ds user_program_loaded: ; decode header mov dx, [0x08] mov ax, [0x06] call physic_to_logic mov [0x06], ax ; realloc ; NOTE: user program must has realloc table at 0x0a mov cx, [0x0a] mov bx, 0x0c realloc: mov dx, [bx+0x2] ; high 16 bits mov ax, [bx] ; low 16 bits call physic_to_logic mov [bx], ax add bx, 4 loop realloc ;jmp show_msg jmp far [ds:0x4] ; far jmp is required physic_to_logic: push dx add ax, [cs:physic_base] adc dx, [cs:physic_base+0x02] shr ax, 4 ror dx, 4 and dx, 0xf000 or ax, dx pop dx ret ; LBA28 mode ; input: ; from `di` sector to read `si` sectors ; outout: ; read to 0x10000 ; ds: 0x10000 ; bx: 0 at the very beginning, user should save bx ; example(read 2 sectors): ; mov di, 1 ; mov si, 1 ; mov ax, 0x10000 ; mov ds, ax ; mov bx, 0 ; xor ax, ax ; ; call read_sector ; inc si ; call read_sector read_sector: push ax push bx push dx push cx ; read one sector mov dx, 0x1f2 mov ax, si out dx, al ; low bits is enough for reading upto 65536 sectors inc dx mov ax, di out dx, al ; mid bits to zero inc dx mov al, 0 out dx, al ; high bits to zero inc dx mov ax, 0 out dx, al ; rest 4bits 24~27 to zero inc dx mov al, 0xe0 or al, ah out dx, al ; send read command inc dx mov al, 0x20 out dx, al .loop: ; read state in al, dx ; NOTE: 0x1f7 is also state port and al, 0x88 cmp al, 0x08 jnz .loop ; not ready yet ; read whole block: 512 bytes, read 2 bytes everytime, so we need read 256 times mov cx, 256 mov dx, 0x1f0 ; data port .read_2bytes: in ax, dx mov [bx], ax ; read 2 bytes add bx, 2 ; change offset loop .read_2bytes pop cx pop dx pop bx pop ax ret show_msg: mov ax, 0xb800 mov es, ax mov byte [es:0x0],'t' mov byte [es:0x1],0x07 mov byte [es:0x2],'e' mov byte [es:0x3],0x07 mov byte [es:0x4],'s' mov byte [es:0x5],0x07 mov byte [es:0x6],'t' mov byte [es:0x7],0x07 jmp $ physic_base dd 0x10000 times 510-($-$$) db 0 db 0x55, 0xaa