~samhsmith/AstraOS

c67de80c087b721797edede62a2b62ed63eb822b — Sam H Smith 2 years ago a223aff
partial performance fix for ipfc's. Optimizing the ipfc return is best done when we move away from round robin schedualing. also fixed memory bug only present in debug build in super_cool_square.
7 files changed, 81 insertions(+), 15 deletions(-)

M square_src/elf.c
M src/kernel.c
M src/log.c
M src/process.c
M src/process_run.c
M src/syscall.c
M src/tempuser.c
M square_src/elf.c => square_src/elf.c +3 -1
@@ 92,7 92,9 @@ while(1)
    else if(is_running_as_twa)
    {
        f64 sec_before_call = AOS_H_time_get_seconds();
        surface_count = AOS_IPFC_call(twa_session_id, 2, &twa_window_handle, surfaces);
        u64 scratch[1024/8];
        scratch[0] = twa_window_handle;
        surface_count = AOS_IPFC_call(twa_session_id, 2, scratch, surfaces);
        f64 sec_after_call = AOS_H_time_get_seconds();
        AOS_H_printf("time to get surfaces via ipfc : %5.5lf ms\n", (sec_after_call - sec_before_call) * 1000.0);
    }

M src/kernel.c => src/kernel.c +2 -2
@@ 576,7 576,7 @@ u64 m_trap(
                            KernelLogEntry entry = KERNEL_LOG[(log_index - i) % KERNEL_LOG_SIZE];
                            if(entry.is_kernel)
                            {
                                printf("%3.3llu) H:%llu - T: %llu | %s:%llu - %s\n",
                                printf("%4.4llu) H:%llu - T: %llu | %s:%llu - %s\n",
                                       log_entry_counter++,
                                       entry.hart,
                                       entry.time,


@@ 586,7 586,7 @@ u64 m_trap(
                            }
                            else
                            {
                                printf("%3.3llu) H:%llu - T: %llu - PID:%llu - TID:%llu | %s:%llu - %s\n",
                                printf("%4.4llu) H:%llu - T: %llu - PID:%llu - TID:%llu | %s:%llu - %s\n",
                                       log_entry_counter++,
                                       entry.hart,
                                       entry.time,

M src/log.c => src/log.c +1 -1
@@ 11,7 11,7 @@ typedef struct
    u8 is_kernel;
} KernelLogEntry;

#define KERNEL_LOG_SIZE 800
#define KERNEL_LOG_SIZE 8000

KernelLogEntry KERNEL_LOG[KERNEL_LOG_SIZE];
atomic_s64 KERNEL_LOG_INDEX;

M src/process.c => src/process.c +15 -4
@@ 30,6 30,7 @@ typedef struct
    Kallocation stack_alloc;
    u64 program_counter;
    u64 process_pid;
    u64 thread_runtime_index;
    u8 is_initialized;
    u8 is_running;
    u8 should_be_destroyed;


@@ 40,6 41,7 @@ typedef struct
    // 2 is IPFC thread awaiting stack
    // 3 is IPFC running
    u64 IPFC_other_pid;
    u64 IPFC_caller_runtime_index;
    u32 IPFC_other_tid;
    u16 IPFC_function_index;
    u16 IPFC_handler_index;


@@ 229,9 231,12 @@ Kallocation THREAD_GROUP_ARRAY_ALLOC;
u64 THREAD_GROUP_ARRAY_LEN;
// uses the same lock as above

// have thread group be zero if you don't have special intentions
// starting_t_value should be zero for non ipfc threads
u32 process_thread_create(u64 pid, u32 thread_group, u64 hart, u32 starting_t_value)
// you must have a write lock on KERNEL_PROCESS_ARRAY_RWLOCK when calling
// thread create.
// have thread_group be zero if you don't have special intentions
// out_runtime_index should either be zero or a pointer to a u64
// it gives you the runtime index of the new thread
u32 process_thread_create(u64 pid, u32 thread_group, u64 hart, u64* out_runtime_index)
{
    assert(pid < KERNEL_PROCESS_ARRAY_LEN, "pid is within range");
    assert(KERNEL_PROCESS_ARRAY[pid]->mmu_table != 0, "pid refers to a valid process");


@@ 322,6 327,12 @@ u32 process_thread_create(u64 pid, u32 thread_group, u64 hart, u32 starting_t_va
 
        runtime = THREAD_RUNTIME_ARRAY_LEN++;
    }

    if(out_runtime_index)
    {
        *out_runtime_index = runtime;
    }
    KERNEL_PROCESS_ARRAY[pid]->threads[tid].thread_runtime_index = runtime;
 
    ThreadRuntime* r = ((ThreadRuntime*)THREAD_RUNTIME_ARRAY_ALLOC.memory) + runtime;
    spinlock_create(&r->lock);


@@ 331,7 342,7 @@ u32 process_thread_create(u64 pid, u32 thread_group, u64 hart, u32 starting_t_va
    r->is_initialized = 1;
    r->owning_hart.value = hart;
    r->allowed_width = KERNEL_HART_COUNT.value; // less temp
    r->t_value = starting_t_value;
    r->t_value = 0;

    // now we must identify and log the thread group
    {

M src/process_run.c => src/process_run.c +8 -1
@@ 73,13 73,14 @@ u64 current_thread_runtimes[KERNEL_MAX_HART_COUNT];
u64 last_mtimes[KERNEL_MAX_HART_COUNT];

/*
 * Make sure you have a READ lock on THREAD_RUNTIME_ARRAY_LOCK
 * Make sure you have a READ lock on KERNEL_PROCESS_ARRAY_RWLOCK
 * when calling kernel_choose_new_thread
 */

struct xoshiro256ss_state kernel_choose_new_thread_rando_state[KERNEL_MAX_HART_COUNT];
void kernel_choose_new_thread(u64 new_mtime, u64 hart)
{
    rwlock_acquire_read(&THREAD_RUNTIME_ARRAY_LOCK);
    ThreadRuntime* runtime_array = THREAD_RUNTIME_ARRAY_ALLOC.memory;

    if(kernel_current_thread_has_thread[hart])


@@ 131,6 132,9 @@ void kernel_choose_new_thread(u64 new_mtime, u64 hart)
        u8 thread_live = thread_runtime_is_live(thread, new_mtime);

        // try punt thread to other hart
        // all threads involved with ipfc's are core locked
        // so they are not considered
        if(!thread->IPFC_status)
        {
            u64 lowest_hart = 0;
            u64 lowest_count = U32_MAX; // don't want to overflow


@@ 224,6 228,7 @@ void kernel_choose_new_thread(u64 new_mtime, u64 hart)

    if(!found_new_thread)
    {
        rwlock_release_read(&THREAD_RUNTIME_ARRAY_LOCK);
        // Causes the KERNEL nop thread to be loaded
        return;
    }


@@ 232,6 237,8 @@ void kernel_choose_new_thread(u64 new_mtime, u64 hart)

    ThreadRuntime runtime = runtime_array[current_thread_runtimes[hart]];

    rwlock_release_read(&THREAD_RUNTIME_ARRAY_LOCK);

    kernel_current_thread_has_thread[hart] = 1;
    kernel_current_thread_tid[hart] = runtime.tid;
    kernel_current_thread_pid[hart] = runtime.pid;

M src/syscall.c => src/syscall.c +28 -2
@@ 2141,7 2141,8 @@ void syscall_IPFC_call(u64 hart, u64 mtime)

    // In order to jump directly into the ipfc thread if there is space for it
    // we must put it at the front of the round robin.
    u32 ipfc_tid = process_thread_create(parent_pid, 0, hart, S32_MAX); // This guy can move ipfc_process
    u64 ipfc_thread_runtime_index;
    u32 ipfc_tid = process_thread_create(parent_pid, 0, hart, &ipfc_thread_runtime_index); // This guy can move ipfc_process
    ipfc_process =  KERNEL_PROCESS_ARRAY[parent_pid];                   //  be wary.
    Thread* ipfc_thread = &ipfc_process->threads[ipfc_tid];



@@ 2150,6 2151,7 @@ void syscall_IPFC_call(u64 hart, u64 mtime)
    ipfc_thread->IPFC_other_pid = process_pid;
    ipfc_thread->IPFC_function_index = user_function_index;
    ipfc_thread->IPFC_handler_index = handler_index;
    ipfc_thread->IPFC_caller_runtime_index = current_thread->thread_runtime_index;

    ipfc_thread->frame.regs[10] = owned_process_index;
    ipfc_thread->frame.regs[11] = user_function_index;


@@ 2163,6 2165,8 @@ void syscall_IPFC_call(u64 hart, u64 mtime)
    current_thread->IPFC_handler_index = handler_index;
    current_thread->is_running = 0;

    try_assign_ipfc_stack(ipfc_process, ipfc_thread);

    kernel_log_user(hart,
                    kernel_current_threads[hart].process_pid,
                    kernel_current_thread_tid[hart],


@@ 2170,7 2174,29 @@ void syscall_IPFC_call(u64 hart, u64 mtime)

    rwlock_release_write(&KERNEL_PROCESS_ARRAY_RWLOCK);
    rwlock_acquire_read(&KERNEL_PROCESS_ARRAY_RWLOCK);
    kernel_choose_new_thread(mtime, hart);

    ipfc_process =  KERNEL_PROCESS_ARRAY[parent_pid];
    ipfc_thread = &ipfc_process->threads[ipfc_tid];

    if(ipfc_thread->IPFC_status == 3)
    {
        rwlock_acquire_read(&THREAD_RUNTIME_ARRAY_LOCK);
        ThreadRuntime* runtime_array = THREAD_RUNTIME_ARRAY_ALLOC.memory;

        current_thread_runtimes[hart] = ipfc_thread->thread_runtime_index;

        ThreadRuntime runtime = runtime_array[current_thread_runtimes[hart]];

        rwlock_release_read(&THREAD_RUNTIME_ARRAY_LOCK);

        kernel_current_thread_has_thread[hart] = 1;
        kernel_current_thread_tid[hart] = runtime.tid;
        kernel_current_thread_pid[hart] = runtime.pid;
    }
    else
    {
        kernel_choose_new_thread(mtime, hart);
    }
    rwlock_release_read(&KERNEL_PROCESS_ARRAY_RWLOCK);
}


M src/tempuser.c => src/tempuser.c +24 -4
@@ 393,13 393,16 @@ void thunder_windowed_application_ipfc_api_entry(u64 source_pid, u16 function_in
    __asm__("la gp, _global_pointer");
    __asm__(".option relax");

//    f64 before = AOS_H_time_get_seconds();
    rwlock_acquire_read(&thunder_lock);

    if(function_index == 0)
    {
        rwlock_release_read(&thunder_lock);
        rwlock_acquire_write(&thunder_lock);
        spinlock_acquire(&tempuser_printout_lock);
        AOS_H_printf("new window! from pid %llu\n", source_pid);
        spinlock_release(&tempuser_printout_lock);
        if(window_count + 1 < 84) // can allocate new window
        {
            u64* window_handle = static_data_1024b;


@@ 434,7 437,12 @@ void thunder_windowed_application_ipfc_api_entry(u64 source_pid, u16 function_in
                        windows[window_count-1] = temp;
                    }
                }
                else { AOS_H_printf("Failed to create consumer for PID: %llu\n", source_pid); }
                else
                {
                    spinlock_acquire(&tempuser_printout_lock);
                    AOS_H_printf("Failed to create consumer for PID: %llu\n", source_pid);
                    spinlock_release(&tempuser_printout_lock);
                }
            }
            rwlock_release_write(&thunder_lock);
            AOS_IPFC_return(1);


@@ 449,7 457,9 @@ void thunder_windowed_application_ipfc_api_entry(u64 source_pid, u16 function_in
    {
        u64* window_handle_pointer = static_data_1024b;
        u64 window_handle = *window_handle_pointer;
        spinlock_acquire(&tempuser_printout_lock);
        AOS_H_printf("destroy window with handle=%llu! from pid %llu\n", window_handle, source_pid);
        spinlock_release(&tempuser_printout_lock);
        // destroy thingy

        rwlock_release_read(&thunder_lock);


@@ 483,7 493,9 @@ void thunder_windowed_application_ipfc_api_entry(u64 source_pid, u16 function_in
    }
    else if(function_index == 2)
    {
        AOS_H_printf("get window surfaces! from pid %llu\n", source_pid);
//        spinlock_acquire(&tempuser_printout_lock);
//        AOS_H_printf("get window surfaces! from pid %llu\n", source_pid);
//        spinlock_release(&tempuser_printout_lock);
        u64 window_handle;
        {
            u64* window_handle_pointer = static_data_1024b;


@@ 499,6 511,10 @@ void thunder_windowed_application_ipfc_api_entry(u64 source_pid, u16 function_in

            copy_to[0] = windows[i].other_surface_slot;
            rwlock_release_read(&thunder_lock);
//            f64 after = AOS_H_time_get_seconds();
//            spinlock_acquire(&tempuser_printout_lock);
//            AOS_H_printf("total ipfc time is %lf ms\n", (after-before) * 1000.0);
//            spinlock_release(&tempuser_printout_lock);
            AOS_IPFC_return(1);
        }
        rwlock_release_read(&thunder_lock);


@@ 521,7 537,7 @@ void program_loader_program(u64 drive1_partitions_directory)
    AOS_stream_put(0, print_text, strlen(print_text));

    spinlock_create(&tempuser_printout_lock);
    spinlock_create(&thunder_lock);
    rwlock_create(&thunder_lock);
    rwlock_acquire_read(&thunder_lock);
    render_work_semaphore = AOS_semaphore_create(0, THREAD_COUNT * JOBS_PER_THREAD);
    render_work_done_semaphore = AOS_semaphore_create(0, 1);


@@ 855,7 871,11 @@ while(1) {
                    }
                }
                else
                { AOS_H_printf("frame has been dropped\n"); }
                {
                    spinlock_acquire(&tempuser_printout_lock);
                    AOS_H_printf("frame has been dropped\n");
                    spinlock_release(&tempuser_printout_lock);
                }

                // do move, not related to consumers
                if(is_moving_window && i + 1 == window_count)