~aritra1911/cylinder

02a4be0445481e58992084af8c6723130f7d3a9d — Aritra Sarkar 2 years ago 0570684 + 99a786f
Merge pull request #1 from aritra1911/parse

Create a struct for SQL parsing
11 files changed, 550 insertions(+), 24 deletions(-)

M .gitignore
M Makefile
A field_test.cpp
A field_type.cpp
A field_type.hpp
A field_type.tpp
M main.cpp
M schema.cpp
M schema.hpp
M sqlparser.cpp
M sqlparser.hpp
M .gitignore => .gitignore +1 -0
@@ 1,2 1,3 @@
*.o
db/
cylinder

M Makefile => Makefile +6 -5
@@ 1,14 1,15 @@
CXXFLAGS = -Wall -Wextra -pedantic -std=c++11
CXXFLAGS = -Wall -Wextra -pedantic -std=c++17
LIBS =
SRC = main.cpp schema.cpp sqlparser.cpp
OBJ = main.o schema.o sqlparser.o
SRC = main.cpp schema.cpp sqlparser.cpp field_type.cpp
OBJ = main.o schema.o sqlparser.o field_type.o

.PHONY: clean

cylinder: $(OBJ)
	$(CXX) $(LIBS) -o $@ $^

%.o: %.cpp
	$(CXX) $(CXXFLAGS) -c -o $@ $<

	$(CXX) $(CXXFLAGS) $(DEBUG) -c -o $@ $<

clean:
	$(RM) $(OBJ) cylinder

A field_test.cpp => field_test.cpp +69 -0
@@ 0,0 1,69 @@
#include <iostream>
#include <string>
#include <vector>
#include "field_type.hpp"

int main(void) {
    int n;
    std::cout << "How many fields? ";
    std::cin >> n;

    std::vector<AbstractField*> record;
    std::vector<std::string> field_names;

    std::cout << "\nEnter space separated field names: ";
    for (int i=0; i<n; i++) {
        std::string name;
        std::cin >> name;
        field_names.push_back(name);
    }

    std::cout << "\nNUMBER = 0\nVARCHAR = 1\nEnter space separated field types: ";

    FieldType type;
    for (int i=0; i<n; i++) {
        std::cin >> (int&)type;
        switch (type) {
            case NUMBER:
                record.push_back(new Field<int>(type, field_names[i]));
                break;

            default:
                record.push_back(new Field<std::string>(type, field_names[i]));
        }
    }

    std::cout << "Enter space separated values: ";
    for (int i = 0; i < n; i++) {
        switch (record[i]->type) {
            case NUMBER:
                std::cin >> static_cast<Field<int>*>(record[i])->value;
                break;

            default:
                std::cin >> static_cast<Field<std::string>*>(record[i])->value;
        }
    }

    std::cout << std::endl;

    for (int i=0; i<n; i++) {
        std::cout << record[i]->name << " : ";
        switch (record[i]->type) {
            case NUMBER:
                std::cout << static_cast<Field<int>*>(record[i])->value;
                break;

            default:
                std::cout << static_cast<Field<std::string>*>(record[i])->value;
        }
        std::cout << std::endl;
    }

    for (int i=0; i<n; i++)
        delete record[i];

    record.clear();

    return 0;
}

A field_type.cpp => field_type.cpp +8 -0
@@ 0,0 1,8 @@
#include <string>
#include "field_type.hpp"

AbstractField::AbstractField(const FieldType& type, const std::string& name) : type(type), name(name) { }

/* Since ~AbstractField() is declared as a pure virtual function, it needs a definition outside the `struct
 * AbstractField' anyway since there is no way to override a destructor. */
AbstractField::~AbstractField() { }

A field_type.hpp => field_type.hpp +39 -0
@@ 0,0 1,39 @@
#ifndef _FIELD_TYPE_HPP
# define _FIELD_TYPE_HPP

# include <string>

enum FieldType {
    NUMBER,  /* Int, TODO: Include Float */
    VARCHAR,  /* std::string, Don't think(TODO) too much right now */
};

/* AbstractField serves as a base for multiple fields of different types, i.e. a collection of AbstractField* in an
 * array can point to multiple Fields of different types. This is how a database record shall be maintained since we
 * don't know prior to user input, what type of Fields we'll be needing and how many of them. FieldType is helpful in
 * determining what type of Field we'll be casting the AbstractField* to.
 *
 * P.S. I'm new to this world of C++ casts and I don't quite know the difference between them. I've experimented with
 * dynamic_cast and static_cast and static_cast seems to be working fine.
 */

struct AbstractField {
    FieldType type;
    std::string name;

    AbstractField(const FieldType&, const std::string&);
    virtual ~AbstractField() = 0;
};

template<class T>
struct Field : AbstractField {
    T value;

    Field(const FieldType&, const std::string&);

    Field(const FieldType&, const std::string&, const T&);
};

# include "field_type.tpp"

#endif

A field_type.tpp => field_type.tpp +9 -0
@@ 0,0 1,9 @@
/* Context behind creating this file:
   https://stackoverflow.com/questions/495021/why-can-templates-only-be-implemented-in-the-header-file */

template <class T>
Field<T>::Field(const FieldType& type, const std::string& name) : AbstractField(type, name) { }

template <class T>
Field<T>::Field(const FieldType& type, const std::string& name, const T& value) :
    AbstractField(type, name), value(value) { }

M main.cpp => main.cpp +22 -8
@@ 6,19 6,33 @@ const std::string PROMPT = "SQL> ";

int main(void) {
    std::string query;
    Schema* schema = nullptr;
    SQL sql;

    std::cout << PROMPT;
    std::getline(std::cin, query);
    while (1) {
        std::cout << PROMPT;
        std::getline(std::cin, query);

    do {
        Schema temp_schema = parse(query);
        /* EXIT on EOF (aka ^D) */
        if (std::cin.eof()) {
            /* Hitting ^D exits abruptly, so here we print nicely, the cause of exit i.e. EOF along with a newline so it
             * doesn't mess up shell prompts and then gracefully exit */
            std::cout << "EOF\n";
            break;
        }

        std::cout << "Schema Name : \"" << temp_schema.get_name() << "\"\n";
        if (query == "EXIT" || query == "QUIT")
            break;

        std::cout << PROMPT;
        std::getline(std::cin, query);
        if (sql.parse(query) == -1)
            continue;  /* Bad query */

        /* TODO: This works on a particular schema, but should we mutate a global schema? */
        sql.execute(schema);
    }

    } while (query != "EXIT" && query != "QUIT");
    if (schema)  /* Make sure we're closing schema files */
        delete schema;

    return EXIT_SUCCESS;
}

M schema.cpp => schema.cpp +120 -2
@@ 1,8 1,126 @@
#include <string>
#include <iostream>
#include <filesystem>
#include "schema.hpp"
#include "field_type.hpp"

Schema::Schema(const std::string& name) : name(name) { }
Schema::Schema(const std::string& name) : name(name) {
    /* Instantiating a Schema, selects it. */

    /* Check if it exists */
    std::ifstream f(SCHEMA_FILE(this->name));
    if (!f.is_open())
        throw DoesntExistException();

    f.close();  /* Close if opened */

    file.open( SCHEMA_FILE(this->name), std::ios::in | std::ios::out | std::ios::binary );//| std::ios::app);
    //file.open( SCHEMA_FILE(this->name), std::ios::in | std::ios::out | std::ios::binary | std::ios::trunc );//| std::ios::app);
}

std::string Schema::get_name(void) {
    return this->name;
}

int Schema::create(const std::string& name) {
    /* Note: This function only creates the schema, but doesn't load it. */
    std::fstream f;

    /* Check if schema already exists */
    f.open(SCHEMA_FILE(name), std::ios::in);
    if (f.is_open()) {
        /* We were able to open the schema, therefore it exists. So return error. */
        std::cerr << "Schema already exists!\n";
        f.close();
        return -1;
    }

    /* Now that we know for sure that it doesn't exist, so create it */
    f.open(SCHEMA_FILE(name), std::ios::out);
    if (!f.is_open()) {
        /* Something went wrong, maybe file create permissions? */
        std::cerr << "Couldn't create schema\n";
        return -1;
    }

    /* Things went as intended. Now close the open output file and return. */
    f.close();
    return 0;
}

void Schema::drop( const std::string& name ) {
    /* Is able to delete file for any schema given a name */

    std::filesystem::path p = SCHEMA_FILE( name );

    if ( !std::filesystem::remove( p ) )
        throw DoesntExistException();
}

void Schema::create_table(const std::string& table_name, AbstractField** fields, const size_t& num_fields) {
    /* Now we have to serialize the table header in the file. It should be something like this:
     *
     * Table_name\n
     * <DATA_TYPE><Field_name><DATA_TYPE><Field_name> ...num_field times... <DATA_TYPE><Field_name>\n
     * ...Records follow here...\n
     * \n
     * Table_name\n
     * ...same as above...
     */

    /* Write the table name on the first line */
    file << table_name << std::endl;

    /* Then print slowly i.e. one by one, the field details i.e. field types and field names */
    for ( size_t i = 0; i < num_fields; i++ ) {
        file.write( reinterpret_cast< char* >( &fields[i]->type ), 1 );
        file << fields[i]->name << ' ';
    }

    /* Add an empty line which will serve as tables separatator */
    file << std::endl;
}

void Schema::describe( const std::string& table_name ) {
    /* TODO: Needs to be implemented actually under a Table class */

    /* Get to begining 'cause that's where table header is */
    file.seekg(0, std::ios::beg);

    /* The first line contains the table name */
    std::string name;
    file >> name;

    /* TODO: Check if this is the table the user wants to be described, i.e. check if ( table_name == name )
     *       Currently, we are testing only 1 table, so it's okay to ignore the `table_name' argument. */

    std::cout << table_name << std::endl;
    file.seekg(1, std::ios::cur);  /* Skip reading the newline character */

    /* The second line contains the field details. It goes something like this:
     * <DATA_TYPE><Field_name> <DATA_TYPE><Field_name> ...num_field times... <DATA_TYPE><Field_name> \n
     */

    FieldType type = NUMBER;

    char ch;
    while ( !file.eof() ) {
        /* Read the datatype, followed by the name of the field */
        file.read( reinterpret_cast< char* >( &type ), 1 );
        file >> name;

        /* Show them (nicely) TODO: Be nicer */
        std::cout << name << '\t' << ( type ? "VARCHAR" : "NUMBER" ) << std::endl;

        file.seekg( 1, std::ios::cur );  /* Skip reading the space character */

        /* Is this the end of the field details of this table? */
        file.get(ch);
        if ( ch == '\n' ) break;  /* If so, get out of here */
        file.putback( ch );  /* If not, put things back into place and continue */
    }
}

Schema::~Schema(void) {
    if (file.is_open())
        file.close();
}

M schema.hpp => schema.hpp +15 -0
@@ 1,12 1,27 @@
#ifndef _SCHEMA_HPP
# define _SCHEMA_HPP

# include <fstream>
# include "field_type.hpp"

# define DATABASE_DIR "db/"  /* TODO: Create if doesn't exist */
# define SCHEMA_FILE(x) (DATABASE_DIR + x)

class Schema {
    std::string name;
    std::fstream file;

    public:
    /* Exceptions */
    class DoesntExistException { };

    Schema(const std::string&);
    std::string get_name(void);
    static int create(const std::string&);
    static void drop( const std::string& );
    void create_table(const std::string&, AbstractField**, const size_t&);
    void describe( const std::string& );
    ~Schema(void);
};

#endif  /* _SCHEMA_HPP */

M sqlparser.cpp => sqlparser.cpp +218 -8
@@ 1,6 1,9 @@
#include <iostream>
#include <sstream>
#include <vector>
#include "sqlparser.hpp"
#include "schema.hpp"
#include "field_type.hpp"

std::string head(const std::string& s) {
    size_t pos = s.find(' ');  // TODO: Include all whitespaces


@@ 22,25 25,232 @@ std::string tail(const std::string& s) {
    return "";
}

/* TODO: Return an integer based on success / failure */
Schema parse(const std::string& sql_query) {
    std::string query = sql_query;  // Create a copy so we don't modify the original one
std::string strip_parenthesis(const std::string& s) {
    /* Gets text within parenthesis */

    size_t pos = s.find('(');  // We're not assuming that `s' starts with '(', but it should!
    size_t qos = s.find(')');  // We're not assuming that `s' ends with ')', but it should!

    if (pos == std::string::npos && qos == std::string::npos)
        /* We didn't find any begining or ending parenthesis */
        return s;

    if (pos == std::string::npos)
        /* We found an ending parenthesis */
        return s.substr(0, qos);

    if (qos == std::string::npos)
        /* We found a starting parenthesis */
        return s.substr(pos + 1);

    /* We found both starting and ending parentheses */
    return s.substr(pos + 1, qos - pos - 1);
}

std::vector<std::string> split(const std::string& str, const char& delim) {
    /* Splits and vectorizes a list of `delim' separated string of items */

    std::istringstream input(str);
    std::vector<std::string> retvec;
    std::string item;

    while (getline(input, item, delim))
        retvec.push_back(item);

    return retvec;
}

Column::Column(const std::string& col_str) {
    /* `col_str' is a space separated list of column_name, data_type and a vaeribale number of constraints */
    /* TODO: Currently no constraints have been implemented so it should be ``col_name datatype'' for now */

    std::vector<std::string> col_vec = split(col_str, ' ');  /* TODO: There are other whitespace characters */

    name = col_vec[0];
    if (col_vec[1] == "NUMBER")
        type = NUMBER;
    else
        type = VARCHAR;
}

int SQL::parse(const std::string& _query) {
    std::string query = _query;  // Create a copy so we don't modify the original one

    /* TODO: SQL is case-insensitive */
    if (head(query) == "CREATE") {
        query = tail(query);  // Chop off head, we won't need that anymore!
        this->statement = CREATE;

        if (head(query) == "SCHEMA") {
            this->substatement = SCHEMA;
            this->name = tail(query);
                /* TODO: SQL statements may end with a semi-colon which is not a part of the name itself. */

        } else if (head(query) == "TABLE") {
            query = tail(query);  // Chop off head, we won't need that anymore!
            this->substatement = TABLE;
            this->name = head(query);

            /* Here tail(query) is now a list of columns and their datatypes separated by comma and enclosed in
             * parenthesis */

            columns.clear();
            for (std::string& col_str : split(strip_parenthesis(tail(query)), ','))
                columns.push_back(Column(col_str));

            /* And that's boyz & gals is how you parse a CREATE TABLE query */

        } else {
            std::cerr << "What's " << head(query) << "? - rest of the line ignored!\n";
            return -1;
        }
    } else if (head(query) == "DROP") {
        query = tail(query);  // Chop off head, we won't need that anymore!
        this->statement = DROP;

        if (head(query) == "SCHEMA") {
            Schema new_schema(tail(query));
            std::cout << "Schema created.\n";
            return new_schema;
            this->substatement = SCHEMA;
            this->name = tail(query);

        } else {
            std::cerr << "What's " << head(query) << "? - rest of the line ignored!\n";
            return Schema("nul");  /* Failure */
            return -1;
        }
    } else if (head(query) == "SELECT") {
        query = tail(query);  // Chop off head, we won't need that anymore!
        this->statement = SELECT;

        if (head(query) == "SCHEMA") {
            this->substatement = SCHEMA;
            this->name = tail(query);

        } else {
            std::cerr << "What's " << head(query) << "? - rest of the line ignored!\n";
            return -1;
        }
    } else if (head(query).substr(0, 4) == "DESC") {
        this->name = tail(query);
        this->statement = DESCRIBE;

    } else {
        std::cerr << "Couldn't parse query\n";
        return Schema("nul");  /* Failure */
        return -1;
    }

    return 0;
}

void SQL::execute(Schema*& schema) {  /* TODO: Should `schema' be mutable? */
    switch (statement) {
        case CREATE:
            switch (substatement) {
                case SCHEMA:
                    if (Schema::create(name) == -1)
                        std::cerr << "Couldn't create schema!\n";
                    else
                        std::cout << "Schema created.\n";
                    break;

                case TABLE: {
                    if (!schema) {
                        std::cerr << "No Schema selected!\n";
                        break;
                    }

                    /* Create an array of Fields that'll be passed to schema->create_table() */
                    AbstractField** fields = new AbstractField*[columns.size()];

                    /* Copy `columns' vector into `fields' array as we now already know the number of columns there are,
                     * so instead of passing a vector<Column>, we'll pass an array of `Field<int>'. Now arises two
                     * questions:
                     *
                     * * Why are we constructing an array again? Just pass the vector and call it done!
                     * > Well, if you see what kind of vector it is, you'll notice that it's a vector of `Column'
                     *   datatype, and `Column' is partial to this module. It helps in parsing and that's all. There's
                     *   no need of making it generic since we already have the `Field' data structure from
                     *   `field_type.hpp' which is generic enough.
                     *
                     * * Why Field<int>?
                     * > Now that's a hack since here we don't really care about what type of data the field is holding
                     *   since it'll be holding none. I could've just used objects of AbstractField, but it's abstract.
                     */
                    size_t i = 0;
                    for (Column& col : columns)
                        fields[i++] = new Field<int>(col.type, col.name);

                    schema->create_table(name, fields, i);

                    /* Clean up */
                    for (i=0; i<columns.size(); i++)
                        delete fields[i];
                    columns.clear();

                }   break;

                default:
                    /* If the parse() works correctly, and provided no break statements were missed above, this case
                     * should never be reached. */
                    std::cerr << "You shouldn't be seeing this!\n";
            }
            break;

        case DROP:
            switch ( substatement ) {
                case SCHEMA:
                    try {
                        Schema::drop( name );

                    } catch ( Schema::DoesntExistException ) {
                        std::cerr << "Schema doesn't exist!\n";
                        break;
                    }

                    std::cout << "Schema dropped!\n";
                    break;

                default:
                    /* If the parse() works correctly, and provided no break statements were missed above, this case
                     * should never be reached. */
                    std::cerr << "You shouldn't be seeing this!\n";
            }
            break;

        case SELECT:
            switch (substatement) {
                case SCHEMA:
                    /* If we've selected another schema that's not pointed by `schema' */
                    if (schema && schema->get_name() != name) {
                        delete schema;  /* Schema's desctructor is called and it safely closes the file */
                        schema = nullptr;
                    }

                    if (!schema) {
                        try {
                            schema = new Schema(name);

                        } catch(Schema::DoesntExistException) {
                            std::cerr << "Schema doesn't exist!\n";
                            break;
                        }
                    }

                    std::cout << "Schema selected.\n";
                    break;

                default:
                    /* If the parse() works correctly, and provided no break statements were missed above, this case
                     * should never be reached. */
                    std::cerr << "You shouldn't be seeing this!\n";
            }
            break;

        case DESCRIBE:
            schema->describe(this->name);
            break;

        default:
            /* If the parse() works correctly, and provided no break statements were missed above, this case should
             * never be reached. */
            std::cerr << "You shouldn't be seeing this!\n";
    }
}

M sqlparser.hpp => sqlparser.hpp +43 -1
@@ 1,13 1,55 @@
#ifndef _SQLPARSER_HPP
# define _SQLPARSER_HPP

# include <string>
# include <vector>
# include "schema.hpp"

enum Clause {
    /* A SQL query begins with one of these */
    CREATE,
    DROP,
    ALTER,
    DESCRIBE,
    SELECT,
    INSERT,
    UPDATE,
    DELETE,

    /* These are mentioned somewhere in the middle of a SQL query */
    SCHEMA,
    TABLE,
    SET,
    INTO,
    VALUES,
    FROM,
    WHERE,
};

struct Column {
    std::string name;
    FieldType type;
    /* TODO: Add constraints */

    Column(const std::string&);
};

struct SQL {
    Clause statement;
    Clause substatement;
    std::string name;
    std::vector<Column> columns;

    int parse(const std::string&);
    void execute(Schema*&);
};

/* After splitting the given string using the first encountered whitespace as a delimeter, the head() and tail()
 * functions do as their name implies */
std::string head(const std::string&);
std::string tail(const std::string&);

Schema parse(const std::string&);
std::string strip_parenthesis(const std::string&);
std::vector<std::string> split(const std::string&);

#endif  /* _SQLPARSER_HPP */