Assembly String
## Assembly Language - String Manipulation
String manipulation is a common requirement in programming.
Handling strings in assembly means directly manipulating byte sequences in memory, which is more low-level but also more flexible.
* * *
## String Definition and Storage
In NASM, a string is essentially a byte sequence and can be defined using the DB pseudo-instruction:
## Example
; String definition methods
section.data
; Method 1: Standard string
str1 db 'Hello, tutorial!',0; C-style: null-terminated
; Method 2: With newline characters
str2 db 'Line1',0xA,'Line2',0xA
; Method 3: Backticks support escape sequences
str3 db `hellonworldn`,0; Auto-convert escape sequences
; Method 4: Define character by character
str4 db 'A','B','C','D',0
; Method 5: Use dup to generate repeated characters
border db 40 dup('-'); 40 dashes
; String length calculation (compile-time)
str1_len equ $-str1 ; Includes terminating null
* * *
## Calculating String Length
There are two ways to get string length: compile-time calculation (EQU) and runtime calculation (traverse to find null):
## Example
; File path: strlen_demo.asm
; Two ways to calculate string length
section.data
; Method A: Compile-time calculation (for constant strings)
msg1 db 'Hello, TUTORIAL!',0xA
msg1_len equ $-msg1 ; Calculated automatically at compile time
; Method B: Null-terminated string (runtime calculation)
msg2 db 'Find my length',0; Ends with 0
section.text
global _start
_start:
; Method A: Directly use compile-time calculated length
mov eax,4
mov ebx,1
mov ecx, msg1
mov edx, msg1_len ; Use constant directly
int 0x80
; Method B: Runtime calculation of null-terminated string length
mov esi, msg2 ; esi points to string start
mov ecx,0; Counter
strlen_loop:
cmp byte ,0; Is current byte null?
je strlen_done ; Yes, done
inc ecx; Count +1
inc esi; Pointer +1
jmp strlen_loop
strlen_done:
; ecx now contains string length (excluding null)
mov eax,4
mov ebx,1
mov ecx, msg2
mov edx, ecx; Use calculated length
; There's an issue here: ecx is overwritten, should save it first
; Correct approach: push ecx then pop to edx
mov eax,1
mov ebx,0
int 0x80
* * *
## String Copying
Use a loop to copy byte by byte, or use x86's string operation instruction `MOVSB`:
## Example
; File path: strcpy_demo.asm
; Two implementations of string copying
section.data
src db 'tutorial source string',0
src_len equ $-src
section.bss
dest_manual resb 64; Manual copy destination
dest_fast resb 64; Fast copy destination
section.text
global _start
_start:
; Method A: Manual byte-by-byte copy
mov esi, src ; Source address
mov edi, dest_manual ; Destination address
mov ecx, src_len ; Number of bytes
copy_loop:
mov al, ; Read one byte
mov , al; Write one byte
inc esi; Source pointer++
inc esi; Destination pointer++
loop copy_loop
; Method B: Use string operation instruction (faster)
cld; Clear direction flag (DF=0, forward copy)
mov esi, src ; Source address (ESI)
mov edi, dest_fast ; Destination address (EDI)
mov ecx, src_len ; Number of bytes (ECX)
rep movsb; Execute movsb rep times
; rep movsb: while(ecx>0) { =; esi++; edi++; ecx--; }
; Verify: output both copy results
mov eax,4
mov ebx,1
mov ecx, dest_manual
mov edx, src_len
int 0x80
mov eax,4
mov ebx,1
mov ecx, dest_fast
mov edx, src_len
int 0x80
mov eax,1
mov ebx,0
int 0x80
> `REP MOVSB` is the most classic string copying method in x86. However, on modern CPUs, manual copy loops with loop unrolling may be faster than REP MOVSB because modern CPUs have better pipeline optimization for simple operations.
* * *
## String Comparison
Use `CMPSB` with `REPE` (repeat until not equal) to compare byte by byte:
## Example
; File path: strcmp_demo.asm
; Compare two strings for equality
section.data
str_a db 'tutorial',0
str_b db 'tutorial',0
str_c db 'TUTORIAL',0
eq_msg db 'Strings are equal',0xA
eq_len equ $-eq_msg
ne_msg db 'Strings are NOT equal',0xA
ne_len equ $-ne_msg
section.text
global _start
_start:
cld; Forward comparison
mov esi, str_a ; First string
mov edi, str_b ; Second string
mov ecx,7; Compare up to 7 bytes (including null)
repe cmpsb; Repeat comparison until not equal or ecx=0
; repe: if ZF=1 (equal) and ecx>0 then continue
; cmpsb: compare and , then esi++, edi++
je strings_equal ; If ZF=1, all bytes are equal
; Not equal
mov eax,4
mov ebx,1
mov ecx, ne_msg
mov edx, ne_len
int 0x80
jmp compare_next
strings_equal:
mov eax,4
mov ebx,1
mov ecx, eq_msg
mov edx, eq_len
int 0x80
compare_next:
; Compare str_a and str_c (different case)
cld
mov esi, str_a
mov edi, str_c
mov ecx,7
repe cmpsb
jne not_equal_2
mov eax,4
mov ebx,1
mov ecx, eq_msg
mov edx, eq_len
int 0x80
jmp exit
not_equal_2:
mov eax,4
mov ebx,1
mov ecx, ne_msg
mov edx, ne_len
int 0x80
exit:
mov eax,1
mov ebx,0
int 0x80
* * *
## String Operation Instructions Summary
| Instruction | Function | Registers Used |
| --- | --- | --- |
| MOVSB | Copy byte: = | ESI=source, EDI=destination, ECX=count, DF=direction |
| MOVSW | Copy word (2 bytes) | Same as above |
| MOVSD | Copy doubleword (4 bytes) | Same as above |
| STOSB | Store byte: = AL | EDI=destination, AL=value, ECX=count |
| LODSB | Load byte: AL = | ESI=source |
| CMPSB | Compare byte: - | ESI=source, EDI=destination, ECX=count |
| SCASB | Scan byte: AL - | EDI=destination, AL=search value, ECX=count |
* * *
## Case Conversion Example
## Example
; File path: case_convert.asm
; Convert lowercase letters in string to uppercase
section.data
msg db 'Hello, tutorial! Welcome to Assembly.',0xA
len equ $-msg
section.text
global _start
_start:
; Output original string
mov eax,4
mov ebx,1
mov ecx, msg
mov edx, len
int 0x80
; Convert: traverse string, lowercase -> uppercase
mov esi, msg ; Point to string start
mov ecx, len ; Loop count
convert_loop:
mov al, ; Read character
cmp al, 'a'; Is it >= 'a'?
jb next_char ; Less than 'a', skip
cmp al, 'z'; Is it <= 'z'?
ja next_char ; Greater than 'z', skip
; Lowercase to uppercase: 'a'(
YouTip