A simple interpreter called 'minibas'

tux4life 0 Tallied Votes 813 Views Share

I didn't know what to do and I thought: why not try writing an interpreter, I've never done this before and it seemed quite challenging, so I started writing code....

The code which you can find below is only a simple base upon which you can start implementing your own simple programming language.
(Yet to mention: the code doesn't feature an expression parser or variables, so you'll have to implement that yourself)

As this is my first time I do a programming challenge like this one, it can of course be that there are several improvements possible in my code, I'd be glad if you could improve me at some point(s).

For simplicity (and testing) I've only implemented two functions :P: PRINT and END (they're both written in uppercase, and the interpreter will report a syntax error if you don't write them like that)

Description of the two built-in functions:

  • PRINT: Print some text (or a number) to the screen, only one token at a time is allowed, supported tokens are: STRING and NUMBER (where STRING is anything you can type on your keyboard (as long as it's in between two quotes) and NUMBER can be any valid integer (floating point numbers aren't supported), the output is always ended with a newline character.
  • END: This function signals the end of the program and shuts down the interpreter.
/*****************
@file: minibas.cpp
*****************/

#include <iostream>
#include <cstdio>
#include <string>
#include "minibas.h"
using namespace std;

/* All global variables needed for the interpreter */
tok_t tok_type;
const int MAX_INP = 256, MAX_TOK = 80;

char input[MAX_INP] = {0};
char token[MAX_TOK] = {0};
bool new_inp;

/* MINIBAS Source Code */

int main()
{
    for(;;)
    {
        flush_input();
        
        printf("> ");
        cin.getline(input, 256);
        printf("\n");
        
        parse();
    }
    
    return 0;
}

void parse()
{
    enum stat_t {PRINT = 0, END, NOSTAT} stat_type; // statements (have to be in exact the same order as the keywords below)
    const char *keywords[] = {
        "PRINT",
        "END",
        "NOSTAT", // always one place before the last element
        ""      // null-terminate the list
    };
    const char **ptr = keywords;
    
    get_token();
    if(tok_type == NOTOK) return; // No token present
    
    for(int i = 0; *ptr[i]; ++i) { // Start converting to integer token
        stat_type = (stat_t) i;
        if( !strcmp(token, ptr[i] ) ) break;
    }
    
    // We assume every line starts with a statement
    switch(stat_type)
    {
        case END:
            do_end();
            break;
        case PRINT:
            do_print();
            break;
        default:
            serror(0, string(token));
    }
}

void get_token()
{
    char *tok = token;
    static char *iptr = input;
    tok_type = NOTOK;
    
    if( new_inp ) { // when interpreting a new line, restore the pointer
        iptr = input;
        new_inp = false;
    }
    
    if( !*iptr ) { // no tokens left on this line
        *tok = '\0';
        return;
    }
    
    while( *iptr && isspace(*iptr) ) iptr++; // skip over spaces
    
    if( *iptr && isdelim(*iptr) ) { // found a delimiter
        tok_type = DELIMITER;
        *tok++ = *iptr++;
    }
    
    if( tok_type == NOTOK && isdigit( *iptr ) ) { // found a number (integer only)
        tok_type = NUMBER;
        while( *iptr && isdigit( *iptr ) && !isdelim( *iptr ) ) {
            *tok++ = *iptr++;
        }
    }
    
    if( tok_type == NOTOK && *iptr == '\"' ) { // string token, get the whole string
        tok_type = STRING;
        *tok++ = *iptr++;
        while( *iptr && ( *tok++ = *iptr++ ) != '\"' );
    }
    
    if( tok_type == NOTOK && !isdelim( *iptr ) ) {
        tok_type = STATEMENT;
        while( *iptr && !isdelim( *iptr ) ) { // token is a statement (or rubbish)
            *tok++ = *iptr++;
        }
    }
    
    *tok = '\0'; // add null-terminator to token
}

void serror(int errnum, const string &custom)
{
    const char *e[] = {
        "Syntax error: ",
        "Wrong argument: ",
        "Missing argument.",
        "No ending quote found.\nInvalid string: ",
        ""
    };
    
    cout << e[errnum] << custom << "\n";
}

void flush_input()
{
    input[0] = '\0';    
    new_inp = true;
}

int isdelim(char c)
{
    if( strchr(", ", c) ) return 1;
    return 0;
}

/***************
@file: minibas.h
***************/

/* All the interpreter's functions */
void parse();
void get_token();
void flush_input();
void serror(int errnum, const std::string &custom);
int isdelim(char c);

void do_end();
void do_print();

/* token type */
enum tok_t {NOTOK, STATEMENT, STRING, DELIMITER, NUMBER};

/***********************
@file: keyword_funcs.cpp
***********************/

/* All functions needed to handle the keywords of minibas */

#include <iostream>
#include <cstdio>
#include <string>
#include "minibas.h"
using namespace std;

extern tok_t tok_type;
extern char token[];

void do_end()
{
    get_token();
    if( tok_type != NOTOK ) {
        serror(0, "END doesn't require any parameters.");
        return;
    }
    
    cout << "Stopped execution.\n";
    exit(0);
}

void do_print()
{    
    get_token();
    
    if( tok_type == NOTOK ) { // no token present, thus no argument for PRINT
        serror(2, "");
        return;
    }
    
    if( tok_type == NUMBER ) {
        cout << token;
    }
    else if( tok_type == STRING ) {
        char *ptr = token;
          
        if( !strchr( ++ptr, '\"' ) ) { // jump over beginning quote
            serror(3, string(token) ); // no ending quote found, thus string is invalid
            return;
        }
        
        while( *ptr != '\"' ) putchar(*ptr++);
    }
    else {
        serror(0, "PRINT can only handle strings and numbers.");
        return;
    }
        
    putchar('\n');
}

/**********
@sample run
**********/
/*
> lmekf

Syntax error: lmekf
> PRINT

Missing argument.
> PRINT "Hello World!!"

Hello World!!
> PRINT 35

35
> PRINT     error

Syntax error: PRINT can only handle strings and numbers.
> END

Stopped execution.
*/
tux4life 2,072 Postaholic

It might be better to implement a separate function to check whether a string is valid or not.

tux4life 2,072 Postaholic

Instead of checking for [B]NOTOK[/B] in the [B]get_token()[/B] function, I had better made them into [B]else if[/B] .

tux4life 2,072 Postaholic

As it now stands, I really have to admit that this code sucks.

Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.