Hi guys !

I'm currently reading "Programming Ground Up" by Jonathan Bartlett
(Amazon, Book's page, Download Book)

The program toupper.s in Chapter 5 frustrates me to some extent :-)

It's working fine, no problem. But There's a certain part, I just don't understand.


Where does the computer know to start again from, after the "jmp read_loop begin" ?

Why doesn't the computer always reads the same first 500 bytes into the buffer ?

Where does he knows from, where he has to continue ?

after the "jmp read_loop begin" he starts again at

movl $SYS_READ, %eax
movl $ST_FD_IN(%ebp), %ebx
movl $BUFFER_DATA, %ecx
movl $BUFFER_SIZE, %edx
int $LINUX_SYSCALL

that's exactly the same information he got in the first place.

So if I have a file conatining 700 bytes.
Where does he know from to start reading at byte No. 501 in the secon run, and not just read the first 500 bytes again ?

Where's the extra information, that tells him, he already red the first 500 bytes and has to read the next 500 byte ?


It's not a bug. I tried running the programm with a buffer-size of 1 (.equ BUFFER_SIZE, 1) and it still worked fine :/

SOURCE CODE

####################

#PURPOSE:     This program converts an input file
#             to an output file with all letters
#             converted to uppercase.
#
#PROCESSING:  1) Open the input file
#             2) Open the output file
#             4) While we’re not at the end of the input file
#                a) read part of file into our memory buffer
#                b) go through each byte of memory
#                     if the byte is a lower-case letter,
#                     convert it to uppercase
#                c) write the memory buffer to output file


  .section .data


#######CONSTANTS########
  #system call numbers
  .equ SYS_OPEN, 5
  .equ SYS_WRITE, 4
  .equ SYS_READ, 3
  .equ SYS_CLOSE, 6
  .equ SYS_EXIT, 1


  #options for open (look at
  #/usr/include/asm/fcntl.h for
  #various values. You can combine them
  #by adding them or ORing them)
  #This is discussed at greater length
  #in "Counting Like a Computer"


  .equ O_RDONLY, 0
  .equ O_CREAT_WRONLY_TRUNC, 03101
                                                          
 #standard file descriptors

 .equ STDIN, 0
 .equ STDOUT, 1
 .equ STDERR, 2

 #system call interrupt

 .equ LINUX_SYSCALL, 0x80
 .equ END_OF_FILE, 0         #This is the return value
                             #of read which means we’ve
                             #hit the end of the file

 .equ NUMBER_ARGUMENTS, 2

.section .bss

 #Buffer - this is where the data is loaded into
 #           from the data file and written from
 #           into the output file. This should
 #           never exceed 16,000 for various
 #           reasons.

 .equ BUFFER_SIZE, 500
 .lcomm BUFFER_DATA, BUFFER_SIZE


 .section .text
 #STACK POSITIONS
 .equ ST_SIZE_RESERVE, 8
 .equ ST_FD_IN, -4
 .equ ST_FD_OUT, -8
 .equ ST_ARGC, 0             #Number of arguments
 .equ ST_ARGV_0, 4          #Name of program
 .equ ST_ARGV_1, 8          #Input file name
 .equ ST_ARGV_2, 12          #Output file name

 .globl _start

_start:
 ###INITIALIZE PROGRAM###
 #save the stack pointer

 movl %esp, %ebp

 #Allocate space for our file descriptors
 #on the stack

 subl $ST_SIZE_RESERVE, %esp

open_files:
open_fd_in:

 ###OPEN INPUT FILE###
 #open syscall

 movl $SYS_OPEN, %eax

 #input filename into %ebx

 movl ST_ARGV_1(%ebp), %ebx

 #read-only flag

 movl $O_RDONLY, %ecx

 #this doesn’t really matter for reading

 movl $0666, %edx

 #call Linux

 int   $LINUX_SYSCALL

store_fd_in:
 #save the given file descriptor

 movl %eax, ST_FD_IN(%ebp)

open_fd_out:

 ###OPEN OUTPUT FILE###
 #open the file

 movl $SYS_OPEN, %eax

 #output filename into %ebx
 movl ST_ARGV_2(%ebp), %ebx

 #flags for writing to the file
 movl $O_CREAT_WRONLY_TRUNC, %ecx

 #mode for new file (if it’s created)
 movl $0666, %edx

 #call Linux
 int   $LINUX_SYSCALL

store_fd_out:

 #store the file descriptor here
 movl %eax, ST_FD_OUT(%ebp)

 ###BEGIN MAIN LOOP###

read_loop_begin:

###READ IN A BLOCK FROM THE INPUT FILE###

movl $SYS_READ, %eax


#get the input file descriptor

movl ST_FD_IN(%ebp), %ebx


#the location to read into

movl $BUFFER_DATA, %ecx


#the size of the buffer
movl $BUFFER_SIZE, %edx


#Size of buffer read is returned in %eax

int $LINUX_SYSCALL


###EXIT IF WE’VE REACHED THE END###
#check for end of file marker

cmpl $END_OF_FILE, %eax


#if found or on error, go to the end

jle end_loop


continue_read_loop:
###CONVERT THE BLOCK TO UPPER CASE###

pushl $BUFFER_DATA #location of buffer
pushl %eax #size of the buffer
call convert_to_upper
popl %eax #get the size back
addl $4, %esp #restore %esp



###WRITE THE BLOCK OUT TO THE OUTPUT FILE###
#size of the buffer

movl %eax, %edx
movl $SYS_WRITE, %eax

#file to use
movl ST_FD_OUT(%ebp), %ebx

#location of the buffer

movl $BUFFER_DATA, %ecx
int $LINUX_SYSCALL


###CONTINUE THE LOOP###
jmp read_loop_begin
end_loop:


###CLOSE THE FILES###
#NOTE - we don’t need to do error checking
# on these, because error conditions
# don’t signify anything special here
movl $SYS_CLOSE, %eax
movl ST_FD_OUT(%ebp), %ebx
int $LINUX_SYSCALL
movl $SYS_CLOSE, %eax
movl ST_FD_IN(%ebp), %ebx
int $LINUX_SYSCALL
###EXIT###
movl $SYS_EXIT, %eax
movl $0, %ebx
int $LINUX_SYSCALL

#PURPOSE:    This function actually does the
#            conversion to upper case for a block
#
#INPUT:      The first parameter is the location
#            of the block of memory to convert
#            The second parameter is the length of
#             that buffer
#
#OUTPUT:     This function overwrites the current
#            buffer with the upper-casified version.
#
#VARIABLES:
#            %eax - beginning of buffer
#            %ebx - length of buffer
#            %edi - current buffer offset
#            %cl - current byte being examined
#                  (first part of %ecx)
#
  ###CONSTANTS##

  #The lower boundary of our search
  .equ LOWERCASE_A, 97 #i.e. ASCII Code for a

  #The upper boundary of our search
  .equ LOWERCASE_Z, 122 #i.e. ASCII Code for 2

  #Conversion between upper and lower case
  .equ UPPER_CONVERSION, -32 #i.e. Differene between ASCII Code of a and z (a-z)

  ###STACK STUFF###
  .equ ST_BUFFER_LEN, 8 #Length of buffer
  .equ ST_BUFFER, 12      #actual buffer

convert_to_upper:
  pushl %ebp
  movl %esp, %ebp

  ###SET UP VARIABLES###
  movl ST_BUFFER(%ebp), %eax
  movl ST_BUFFER_LEN(%ebp), %ebx
                                                                             

 movl    $0, %edi

 #if a buffer with zero length was given
 #to us, just leave
 cmpl $0, %ebx
 je      end_convert_loop

convert_loop:
 #get the current byte
 movb (%eax,%edi,1), %cl

 #go to the next byte unless it is between
 #’a’ and ’z’
 cmpb $LOWERCASE_A, %cl
 jl      next_byte
 cmpb $LOWERCASE_Z, %cl
 jg      next_byte

 #otherwise convert the byte to uppercase
 addb $UPPER_CONVERSION, %cl

 #and store it back
 movb %cl, (%eax,%edi,1)
next_byte:
 incl %edi                    #next byte
 cmpl %edi, %ebx              #continue unless
                              #we’ve reached the
                              #end
 jne     convert_loop
end_convert_loop:

 #no return value, just leave
 movl %ebp, %esp
 popl %ebp
 ret


#######################

hi,

first i have to say that i don't know this assembler ( i guess it is for Linux )
but the lines

#the size of the buffer
movl $BUFFER_SIZE, %edx

probably mean, that the read function only reads the next 500 bytes.
The read pointer then automatically points to the next position, so in the second run the next 500 byte will be read, or end of file will be reached...

hi,

first i have to say that i don't know this assembler ( i guess it is for Linux )
but the lines

#the size of the buffer
movl $BUFFER_SIZE, %edx

probably mean, that the read function only reads the next 500 bytes.
The read pointer then automatically points to the next position, so in the second run the next 500 byte will be read, or end of file will be reached...

Yes. That's what I wanted to know :-)

Here's another answer quoted from the book's mailinglist:

If I understand you correctly, you are asking why isn't the same N (buffer size) bytes of the file read over an over again, because there is no explicit "move the file position pointer forward" in the program.

This is handled "under the covers" by the Linux system call "read."

.equ SYS_READ, 3

From the documentation:

read() attempts to read up to count bytes from file descriptor fd into the buffer starting at buf.

" On success, the number of bytes read is returned (zero indicates end of file), and the file position is advanced by this number."


http://linux.about.com/od/commands/l/blcmdl2_read.htm

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.