Skip to content

CPP: Implement list #1205

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft

CPP: Implement list #1205

wants to merge 1 commit into from

Conversation

Smit-create
Copy link
Collaborator

@Smit-create Smit-create commented Oct 17, 2022

@certik @czgdp1807

  1. The current implementation uses list_api which is the same as in C. Should also add an option to use STL libraries in CPP like vector which might provide cleaner code just from developer's point of view? Benchmarks using the current list_api and std::vector are shown below.
  2. One more question is should we keep using Kokkos?
  3. Once we figure out the above 2 solutions we can also start testing the CPP backend in integration tests?

Benchmarks using current list_api and std::vector:

  1. Using list_api:
Generated Code

#include <iostream>
#include <string>
#include <vector>
#include <cassert>
#include <cmath>
#include <complex>
// #include <Kokkos_Core.hpp>
#include <lfortran_intrinsics.h>

// template <typename T>
// Kokkos::View<T*> from_std_vector(const std::vector<T> &v)
// {
//     Kokkos::View<T*> r("r", v.size());
//     for (size_t i=0; i < v.size(); i++) {
//         r(i) = v[i];
//     }
//     return r;
// }


struct dimension_descriptor
{
    int32_t lower_bound, length;
};

struct list_i32 {
    int32_t capacity;
    int32_t current_end_point;
    int32_t* data;
};

inline bool compare_i32(int32_t a, int32_t b);
inline void list_init_i32(struct list_i32* x, int32_t capacity);
inline void list_deepcopy_i32(struct list_i32* src, struct list_i32* dest);
inline void resize_if_needed_i32(struct list_i32* x);
inline void list_append_i32(struct list_i32* x, int32_t element);
inline void list_insert_i32(struct list_i32* x, int pos, int32_t element);
inline int list_find_item_i32(struct list_i32* x, int32_t element);
inline void list_remove_i32(struct list_i32* x, int32_t element);
inline void list_clear_i32(struct list_i32* x);
inline struct list_i32* list_concat_i32(struct list_i32* left, struct list_i32* right);

// Forward declarations
void _lpython_main_program();
int64_t test_list(int32_t n);
namespace {
}

// Implementations
void _lpython_main_program()
{
    std::cout << test_list(100000002) << std::endl;
}

int64_t test_list(int32_t n)
{
    int64_t _lpython_return_variable;
    struct list_i32 a;
    int32_t i;
    int64_t s;
    struct list_i32 constname0;
    list_init_i32(&constname0, 1);
    constname0.data[0] = 0;
    constname0.current_end_point = 1;
    list_deepcopy_i32(&constname0, &a);

    for (i=0; i<=n - 1; i++) {
        list_append_i32(&a, i);
    }
    s = 0;
    for (i=0; i<=n - 1; i++) {
        s = s + a.data[i];
    }
    _lpython_return_variable = s;
    return _lpython_return_variable;
}

namespace {

void main2() {
    _lpython_main_program();
}

}
int main(int argc, char* argv[])
{
    // Kokkos::initialize(argc, argv);
    main2();
    // Kokkos::finalize();
    return 0;
}

bool compare_i32(int32_t a, int32_t b) {
    return a == b;
}

void list_init_i32(struct list_i32* x, int32_t capacity) {
    x->capacity = capacity;
    x->current_end_point = 0;
    x->data = (int32_t*) malloc(capacity * sizeof(int32_t));
}

void list_deepcopy_i32(struct list_i32* src, struct list_i32* dest) {
    dest->capacity = src->capacity;
    dest->current_end_point = src->current_end_point;
    dest->data = (int32_t*) malloc(src->capacity * sizeof(int32_t));
    memcpy(dest->data, src->data, src->capacity * sizeof(int32_t));
}

void resize_if_needed_i32(struct list_i32* x) {
    if (x->capacity == x->current_end_point) {
        x->capacity = 2 * x->capacity + 1;
        x->data = (int32_t*) realloc(x->data, x->capacity * sizeof(int32_t));
    }
}

void list_append_i32(struct list_i32* x, int32_t element) {
    resize_if_needed_i32(x);
    x->data[x->current_end_point] = element;
    x->current_end_point += 1;
}

void list_insert_i32(struct list_i32* x, int pos, int32_t element) {
    resize_if_needed_i32(x);
    int pos_ptr = pos;
    int32_t tmp_ptr = x->data[pos];
    int32_t tmp;
    while (x->current_end_point > pos_ptr) {
        tmp = x->data[pos_ptr + 1];
        x->data[pos_ptr + 1] = tmp_ptr;
        tmp_ptr = tmp;
        pos_ptr++;
    }

    x->data[pos] = element;
    x->current_end_point += 1;
}

int list_find_item_i32(struct list_i32* x, int32_t element) {
    int el_pos = 0;
    while (x->current_end_point > el_pos) {
        if (compare_i32(x->data[el_pos], element)) return el_pos;
        el_pos++;
    }
    return -1;
}

void list_remove_i32(struct list_i32* x, int32_t element) {
    int el_pos = list_find_item_i32(x, element);
    while (x->current_end_point > el_pos) {
        int tmp = el_pos + 1;
        x->data[el_pos] = x->data[tmp];
        el_pos = tmp;
    }
    x->current_end_point -= 1;
}

void list_clear_i32(struct list_i32* x) {
    free(x->data);
    x->capacity = 4;
    x->current_end_point = 0;
    x->data = (int32_t*) malloc(x->capacity * sizeof(int32_t));
}

struct list_i32* list_concat_i32(struct list_i32* left, struct list_i32* right) {
    struct list_i32 *result = (struct list_i32*)malloc(sizeof(struct list_i32));
    list_init_i32(result, left->current_end_point + right->current_end_point);
    memcpy(result->data, left->data, left->current_end_point * sizeof(int32_t));
    memcpy(result->data + left->current_end_point, right->data, right->current_end_point * sizeof(int32_t));
    result->current_end_point = left->current_end_point + right->current_end_point;
    return result;
}

Results:

% CPATH=$PWD/src/libasr/runtime g++ c.cpp -o c.out 
% time ./c.out 
5000000050000000
./c.out  0.61s user 0.05s system 39% cpu 1.661 total
% time ./c.out
5000000050000000
./c.out  0.62s user 0.06s system 99% cpu 0.677 total
  1. Using std::vector
Equivalent Code

#include <vector>
#include <iostream>

int64_t test_list(int32_t n) {
    std::vector<int32_t> a = {0};
    for (int32_t i = 0; i < n; i++) {
        a.push_back(i);
    }
    int64_t s=0;
    for (int32_t i = 0; i < n; i++) {
        s += a[i];
    }
    return s;
}

int main() {
    std::cout << test_list(100000002) << std::endl;
    return 0;
}

Results:

% g++ -std=c++11 b.cpp -o b.out
% time ./b.out 
5000000050000000
./b.out  2.94s user 0.12s system 83% cpu 3.687 total
% time ./b.out
5000000050000000
./b.out  2.93s user 0.10s system 99% cpu 3.039 total
% time ./b.out
5000000050000000
./b.out  2.93s user 0.10s system 99% cpu 3.030 total

@Smit-create Smit-create added the cpp C++ related changes label Oct 17, 2022
@Smit-create Smit-create marked this pull request as draft October 17, 2022 10:40
@Smit-create Smit-create added the question Further information is requested label Oct 30, 2022
@czgdp1807
Copy link
Collaborator

Seems like list_api is faster?

@Smit-create
Copy link
Collaborator Author

Seems like list_api is faster?

Yes

@czgdp1807
Copy link
Collaborator

Try with O3 optimisation for clang in both the cases you have considered.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
cpp C++ related changes question Further information is requested
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants