Hello.

I am having a very hard time trying to encode a file and then decode it again so i can end up with the original file.

I want to transform the content in the original file into base 85, and perhaps 100 charaters or more so that it works faster.. I've tried and succeeded with decimal, hexadecimal and base 64 bases.
Ex. When i have 'abcde' in a text file and i encode it into 97 98 99 100 101, and then decode it back in a different file i have "abcde"

When i use hex base .. i have 'abcde' and then encode it as 61 62 63 64 65.. when i decode it it gives me back abcde..

THe table looks liie this.


Base is the encoding mode.
Alphabet is the letters it has. Example: the decimal alphabet has 10 "letters"( 0 1 2 3 4 5 6 7 8 9) and a space === 11.

PROPORTION MEANS HOW FAST THE ENCODING IS DONE.


HERE IS MY TABLE

http://img524.imageshack.us/img524/2229/daniwx6.jpg


Here is my code for base 64.
;---------------------------------------------------------
.386
assume cs:main_seg, ds:main_seg
main_seg segment
org 100h
;-----------------------------variables-------------------
start:
jmp real_start
help_content db "Syntex is : code [inputfile] [outputfile]",'$'
tail_length byte 0
length1 byte 0
file_in db 30 dup(0),'$'
file_out_en db 30 dup(0),'$'

inp db 3 dup(?),'$'
oute db 4 dup (?),'$'
buffsize1 equ 3
buffsize2 equ 4
inh1 dw ?
outh1 dw ?

;----------------------------program starts here------------------------------
real_start:

mov bx, 80h ;points bx to the arguments to get the lenght
mov al, [bx]
mov tail_length, al ;store total length of the argument to tail_lenght
dec tail_length ;decrease lenght by one since first character is space and we are not going to count space
mov bx, 81h ;//points bx to get the argument
mov al, [bx] ;//move first argument to al
cmp al, 13 ;//compare if there was no argument
jne next1 ;//if there are arguments, go to analize them
syntax_error:
mov ah, 09h ;//if no argument, show help and stop application
lea dx, help_content
int 21h
call exit

next1:
inc bx ;eleminate the space as i said before
mov dx, offset file_in ;point argument1 to dx

mov cl, tail_length ;Max loop counter set to total lenght of the argument

part1: ;get first part of the agrument
mov al, byte ptr [bx]
inc bx
xchg dx, bx
mov byte ptr [bx], al
inc bx
xchg dx, bx
cmp al, 20h
je getpart2
inc length1
loop part1

getpart2:
mov bx, 82h
sub cx, cx
mov cl, length1
add bx, cx
mov al, [bx]

space_remover2:
cmp al, 13
je syntax_error
inc bx
inc length1
mov al, [bx]
cmp al, 13
je syntax_error
cmp al, 20h
je space_remover2
mov dx, offset file_out_en
mov cl, tail_length
sub cl, length1

part2:
mov al, byte ptr [bx]
inc bx
xchg dx, bx
mov byte ptr [bx], al
inc bx
xchg dx, bx

loop part2

;------------opening input file
mov ah, 3dh ;to open a existing file
mov al,0 ;mode = read only
mov dx, offset file_in ;point the input file to dx
int 21h ;call Ms-dos
jc exit ;quit if error
mov inh1, ax ;move handlar to inh.


;-------------creating output file

mov ah, 3ch ;creat output file
sub cx, cx ; attributes clear
mov dx, offset file_out_en ;point the output file to dx
int 21h ;call Ms-dos
jc exit ;quit if error
mov outh1, ax ;mode handlare to outh

e:

;-----------reading from the input file
mov ah, 3fh ;read the file
mov bx, inh1 ;point input file handler
mov cx, buffsize1 ;seting the size to read at a time
mov dx, offset inp ;pointing the buffer where to store
int 21h ;call ms-dos
jc exit ;quit if error

;--------------add 0 at the end of the file if total caracter is not divided by zero
cmp ax, 2
jne n1
mov inp[2], 0
n1:
cmp ax, 1
jne n2
mov inp[1],0
mov inp[2],0
n2:


or ax, ax
jz done ;jump to done if ax=0


;---------take 3 character (8bit) input form inp vaiable and store 4 character (6bit) to outp vairable
L1:
mov al, inp[0]

shr al, 2
add al, 49d
mov oute[0], al

L2:
mov ax, word ptr inp[0]
xchg al, ah
shl ax, 6
shr ax, 10
add al, 49d
mov oute[1], al

L3:
mov ax, word ptr inp[1]
xchg al, ah
shl ax, 4
shr ax, 10
add al, 49d
mov oute[2], al
L4:
mov al, inp[2]
shl al, 2
shr al,2
add al, 49d
mov oute[3], al
;--------------------------write it to the file


;-----------writing buffer to the file


mov cx, buffsize2 ;set number of bye to write
mov ah, 40h ;write the file
mov bx, outh1 ;point to the output file
mov dx, offset oute ;write from buffer
int 21h ;call dos
jc exit ;quit if error


;----------------------------------------------------

jmp e
done:


;---------close files
mov ah, 3eh
mov bx, inh1
int 21h

mov ah, 3eh
mov bx, outh1
int 21h

exit:
int 20h
main_seg ends
end start

----aca esta el decoding...
.386
assume cs:main_seg, ds:main_seg
main_seg segment
org 100h
;-----------------------------variables-------------------
start:
jmp real_start
jmp real_start
help_content db "Syntex is : code [inputfile] [outputfile]",'$'
tail_length byte 0
length1 byte 0
fin_en db 30 dup(0),'$'
fout_de db 30 dup(0),'$'


inp2 db 4 dup (?),'$'
outp db 3 dup (?),'$'
buffsize3 equ 4
buffsize4 equ 3
inh2 dw ?
outh2 dw ?

;----------------------------program starts here------------------------------
real_start:

;--------------------------next part is working with the arguments and getting the filenames------------

mov bx, 80h ;points bx to the arguments to get the lenght
mov al, [bx]
mov tail_length, al ;store total length of the argument to tail_lenght
dec tail_length ;decrease lenght by one since first character is space and we are not going to count space

mov bx, 81h ;//points bx to get the argument
mov al, [bx] ;//move first argument to al
cmp al, 13 ;//compare if there was no argument
jne next1 ;//if there are arguments, go to analize them

syntax_error:
mov ah, 09h ;//if no argument, show help and stop application
lea dx, help_content
int 21h
call exit

next1:
inc bx ;eleminate the space as i said before
mov dx, offset fin_en ;point argument1 to dx

mov cl, tail_length ;Max loop counter set to total lenght of the argument

part1: ;get first part of the agrument
mov al, byte ptr [bx]
inc bx
xchg dx, bx
mov byte ptr [bx], al
inc bx
xchg dx, bx
cmp al, 20h
je getpart2
inc length1
loop part1

getpart2:
mov bx, 82h
sub cx, cx
mov cl, length1
add bx, cx
mov al, [bx]

space_remover2:
cmp al, 13
je syntax_error

inc bx
inc length1

mov al, [bx]
cmp al, 13
je syntax_error
cmp al, 20h
je space_remover2


mov dx, offset fout_de ;now, we start with next part
mov cl, tail_length
sub cl, length1 ;set loopcounter total lenght-length of first arguments including space

part2: ;get the 2nd part of the agrumnet
mov al, byte ptr [bx]
inc bx
xchg dx, bx
mov byte ptr [bx], al
inc bx
xchg dx, bx

loop part2

;------------opening encoded input file
mov ah, 3dh ;to open a existing file
mov al,0 ;mode = read only
mov dx, offset fin_en ;point the input file to dx
int 21h ;call Ms-dos
jc exit ;quit if error
mov inh2, ax ;move handlar to inh.

;-------------creating output file

mov ah, 3ch ;creat output file
sub cx, cx ; attributes clear
mov dx, offset fout_de ;point the output file to dx
int 21h ;call Ms-dos
jc exit ;quit if error
mov outh2, ax ;mode handlare to outh

e:

;--read the file
mov ah, 3fh
mov bx, inh2
mov cx, buffsize3
mov dx, offset inp2
int 21h
jc exit ;quit if error

or ax, ax
jz done ;jump to done if ax=0


k1:
mov ax, word ptr inp2[0]
sub al, 49d
sub ah, 49d
xchg al, ah
shl al, 2
shr ax, 6
mov outp[0], al

k2:
mov ax, word ptr inp2[1]
sub al, 49d
sub ah, 49d
xchg al, ah
shl al, 2
shr ax, 4
shl ax, 2
shr ax, 2
mov outp[1], al

k3:
mov ax, word ptr inp2[2]
sub al, 49d
sub ah, 49d
xchg al, ah
shl al, 2
shl ax, 6
shr ax, 8
mov outp[2], al
mov cx, buffsize4 ;set number of bye to write
mov ah, 40h ;write the file
mov bx, outh2 ;point to the output file
mov dx, offset outp ;write from buffer
int 21h ;call dos
jc exit ;quit if error


jmp e
done:
;---------close files

mov ah, 3eh
mov bx, inh2
int 21h

mov ah, 3eh
mov bx, outh2
int 21h

exit:
int 20h
main_seg ends
end start


THe code for 64 bit actually works. However, i still don't know how to separete the encoded material into orderdly lines.
MY encoding file looks like this...
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

BUt i would like to make it like this..
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.

Do you guys have any suggestions?

Also, i am trying to create encode85 and decode85.

This is similar to base 64 however, i don't really understand how to come up with the decode and encode code.
Could you guys help me out?

If you have any links or sources that could help me understand this file to text and text to file conversion(base 85 or base 100 or above) ... please post them.

The characters that i am able to use are from character 33 "!" to 126 "~" from the ASCII table.

I am using the first 64 --- from 33 to "97" characters for my 64 encoding and i would like to use the characters 33 to 118 for my 85 encoding.

The last two encoding options use characters that are one after the other.

However, for my last encoding\decoding option, base 100 ore more, the characters after 126 in the ASCII table are on a separate table.

My porblem is that i don't know how to use those characters in my programs since I've only worked with characters from the first table.
Here is the lin k::: http://www.asciitable.com/

summing things up:

  • I would like to get help with base85 and base100 or above encoding/decoding --any links about this matter would help.
  • If you have any algorithm for base100 or above please if you know something about it or have any link please post them so i can read.

:::::EXTRA INFO::::

I am using TASM32 to compile this assembly source code.
What i've written here are two programs, one that encodes(first source code) and one that decodes text files in base 64.

Once the .exe or .com are present we go to the command pront and write the arguments

I have encode64 for the first program and decode64 for the second program.

For instance when i want to convert a picture to base64 text this is what i do:
encode64 [source] [destination]
eg. encode64 dani.jpg danili.txt

for decoding we write the same thing
decode64 danili.txt output.jpg and we have the original file(dani.jpg)

Are you talking about this kind of thing?

A number's representation is limited only by the number of digits used by the radix. We use decimal, meaning radix = 10: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9.

Hexidecimal is radix 16: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F (without regard to case).

I don't know how you plan to do base 100. The whole point of base-n encoding is to protect text against "special" numbers. Typically this works because the target radix is sufficiently smaller than the source. For email, the source is ASCII: 7-bits or 128 values. The target in base 64 is, amazingly enough, 64 values. These values can then be represented inside the source set using characters that are known not to have special meaning, 0..9, A..Z, a..z, and two others (10+26+26+2=64). (Which two others varies by implementation.)

Converting between bases is easy enough: all you need is a lookup table for each digit. In decimal the LUT would be "0123456789". '0' is element 0. '1' is index 1. etc.

Hope this helps.

Are you talking about this kind of thing?

A number's representation is limited only by the number of digits used by the radix. We use decimal, meaning radix = 10: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9.

Hexidecimal is radix 16: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F (without regard to case).

I don't know how you plan to do base 100. The whole point of base-n encoding is to protect text against "special" numbers. Typically this works because the target radix is sufficiently smaller than the source. For email, the source is ASCII: 7-bits or 128 values. The target in base 64 is, amazingly enough, 64 values. These values can then be represented inside the source set using characters that are known not to have special meaning, 0..9, A..Z, a..z, and two others (10+26+26+2=64). (Which two others varies by implementation.)

Converting between bases is easy enough: all you need is a lookup table for each digit. In decimal the LUT would be "0123456789". '0' is element 0. '1' is index 1. etc.

Hope this helps.

Thank you for the help. This example helped me understand some things about 64 decoding. Do you have any algorith to use more than 100 characters for encoding?
Also do you know how to equally separate the lines.... ?

I don't have any algorithm for base64. It wouldn't matter anyway, because the only difference between 64/100/etc. is the radix. Just make it an argument to your function.

The character set used also matters, so I would make it an argument also.

To equally separate lines, just count off how many times you write to file. After 10 or 20 or whatever, output a newline and reset the counter.

Hope this helps.

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.