~jonn/bed

e77e86d980b43424a8e4f568e3ce6435d83cafc1 — Jonn 3 years ago
Problem: no easy to use binary stream editor

Solution:

 - Using Erlang features
 - Design a binary stream editor system (see README.md)
 - Start implementation, lay out necessary modules, hinting at the
	supervision tree.
A  => .gitignore +19 -0
@@ 1,19 @@
.rebar3
_*
.eunit
*.o
*.beam
*.plt
*.swp
*.swo
.erlang.cookie
ebin
log
erl_crash.dump
.rebar
logs
_build
.idea
*.iml
rebar3.crashdump
*~

A  => LICENSE +13 -0
@@ 1,13 @@
           DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
                   Version 2, December 2004
 
Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>

Everyone is permitted to copy and distribute verbatim or modified
copies of this license document, and changing it is allowed as long
as the name is changed.
 
           DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
  TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION

 0. You just DO WHAT THE FUCK YOU WANT TO.

A  => README.md +54 -0
@@ 1,54 @@
# Bed: Binary Editor

## Problem

There is no way to reasonably quickly manipulate binaries

## Objectives

 - Simple binary maipulation with ergonomic syntax
 - Work with arbitrary chunks and paddings
 - Streaming support
 - Saving recipes for later use and composition

## Requirements

 1.PMAP: `bed` shall allow for writing pmap (parallel map) instructions over binary data
 2.PIPE: `bed` shall be compatible with UNIX-pipe streaming
 3.SPLT: `bed` shall have capabilities to produce binary and text outputs
 4.CMPS: `bed` shall have utility to compose instructions
 5.SCRT: `bed` shall provide a facility to save instructions or their compositions into script files

### Commentary

 1.PMAP: For mapping part of the pipeline, we want to parallelise it so that as many cores as possible are used.
         Interplay with 5.SCRP is as follows:
         whenever a user submits more than one _map instruction_, map instructions are composed into one instruction first and then executed in parallel.
 2.PIPE: We pick UNIX pipes because that's the way sed and awk works, but we will aim to write code in such way that `bed` is extendible with other sorts of streaming (raw tcp, ...).
 3.SPLT: Plain text output will not be sufficient for code generation, as we'll need to have
 4.CMPS: This is covered in 1.PMAP, but we should explicitly mention that we'll execute instructions in the order submitted by the user
 5.SCRT: Make `--save` flag or something...

## User stories and format

### Basic usage

 - `abcdefgh>abcdefg` transforms to non-byte-aligned bitstring
 - `abcdefgh>abcdefg|:0` transforms to byte-aligned binary (`|:0` means "byte-pad the end with zeroes")
 - `abcdefgh>abcdefg|0:` transforms to byte-aligned binary (`|0:` means "byte-pad the beginning with zeroes")
 - `abcdefgh>abcdefg|0:9` transforms to 9-bit-aligned bitstring (I don't know why anyone would need it)
 - `abcdefgh>abcdefg|"the world wonders":` uses ASCII padding in front, produces byte-aligned text

### Several rules

 - `1bcd>d;a1cd>cd;abcd>dcba|:1`
        - keeps last bit of quartet if the first bit is 1
        - keeps last two bits of quartet if the second bit is 1
        - reverses the quartet otherwise
        - pads with ones in the end

### Roadmap

The same way we can't support folds, we can pmap non-aligned inputs, this is why a rule like this is currently not possible `0ab>;a1cd>d1ca;ab0d>0000;abcd>dcba;|:1`.

When we add sequential backend for folds, we'll also add support for non-aligned rules.
\ No newline at end of file

A  => apps/bed/src/bed.app.src +15 -0
@@ 1,15 @@
{application, bed,
 [{description, "An OTP application"},
  {vsn, "0.1.0"},
  {registered, []},
  {mod, {bed_app, []}},
  {applications,
   [kernel,
    stdlib
   ]},
  {env,[]},
  {modules, []},

  {licenses, ["Apache 2.0"]},
  {links, []}
 ]}.

A  => apps/bed/src/bed_app.erl +18 -0
@@ 1,18 @@
%%%-------------------------------------------------------------------
%% @doc bed public API
%% @end
%%%-------------------------------------------------------------------

-module(bed_app).

-behaviour(application).

-export([start/2, stop/1]).

start(_StartType, _StartArgs) ->
    bed_sup:start_link().

stop(_State) ->
    ok.

%% internal functions

A  => apps/bed/src/bed_io_srv.erl +32 -0
@@ 1,32 @@
-module(bed_io_srv).

-behaviour(gen_server).

-export([start_link/0]).
-export([init/1, handle_call/3, handle_cast/2]).
-export([receive_worker_list_change/2]).

-record(state,
        {sofo_pool_pids = sets:from_list([]) :: sets:set(pid()), chunk_inbox = [] :: [binary()]}).

start_link() ->
    gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).

init(_) ->
    bed_sofo_pool:subscribe_to_worker_list_changes_via({?MODULE, receive_worker_list_change}),
    {ok, #state{}}.

-spec receive_worker_list_change(Event :: enter | leave, Pids :: sets:set(pid())) ->
                                    term().
receive_worker_list_change(Event, Pids) ->
    gen_server:call(?MODULE, {receive_worker_list_change, {Event, Pids}}).

handle_call({receive_worker_list_change, {enter, PidsDelta}},
            _From,
            State0 = #state{sofo_pool_pids = Pids0}) ->
    {reply, {error, undefined}, State0};
handle_call(_, _, S0) ->
    {reply, {error, nop}, S0}.

handle_cast(_, S0) ->
    {noreply, S0}.

A  => apps/bed/src/bed_sofo_pool.erl +9 -0
@@ 1,9 @@
-module(bed_sofo_pool).

-export([subscribe_to_worker_list_changes_via/1]).

-spec subscribe_to_worker_list_changes_via({atom(), atom()}) -> 'ok' | {'error', atom()}.
subscribe_to_worker_list_changes_via({Module, Function}) ->
    {error, undefined};
subscribe_to_worker_list_changes_via(_) ->
    {error, badarg}.

A  => apps/bed/src/bed_sofo_worker.erl +1 -0
@@ 1,1 @@
-module(bed_sofo_worker).
\ No newline at end of file

A  => apps/bed/src/bed_sup.erl +104 -0
@@ 1,104 @@
%%%-------------------------------------------------------------------
%% @doc bed top level supervisor.
%% @end
%%%-------------------------------------------------------------------

-module(bed_sup).

-behaviour(supervisor).

-export([start_link/0]).
-export([init/1]).

-define(SERVER, ?MODULE).

start_link() ->
    % dirty-dirty
    main(),
    main_fan(),
    supervisor:start_link({local, ?SERVER}, ?MODULE, []).

%% sup_flags() = #{strategy => strategy(),         % optional
%%                 intensity => non_neg_integer(), % optional
%%                 period => pos_integer()}        % optional
%% child_spec() = #{id => child_id(),       % mandatory
%%                  start => mfargs(),      % mandatory
%%                  restart => restart(),   % optional
%%                  shutdown => shutdown(), % optional
%%                  type => worker(),       % optional
%%                  modules => modules()}   % optional
init([]) ->
    SupFlags =
        #{strategy => one_for_all,
          intensity => 0,
          period => 1},
    ChildSpecs = [],
    {ok, {SupFlags, ChildSpecs}}.

%% internal functions

-define(BYTE, 8).

-spec gcd(integer(), integer()) -> non_neg_integer().
gcd(X, Y) ->
    X1 = abs(X),
    Y1 = abs(Y),
    gcd_do(max(X1, Y1), min(X1, Y1)).

-spec gcd_do(non_neg_integer(), non_neg_integer()) -> non_neg_integer().
gcd_do(X, X) ->
    X;
gcd_do(X, Y) ->
    X1 = X - Y,
    gcd_do(max(X1, Y), min(X1, Y)).

-spec fan_bytes(BitsPerRule :: non_neg_integer(),
                Workers :: [pid()],
                FH :: file:io_device()) ->
                   {error, undefined}.
fan_bytes(BitsPerRule, Workers, FH) ->
    GreatestCommonDenominator = gcd(?BYTE, BitsPerRule),
    % This is the amount of bytes to fan out.
    % It works because the formula for bits to fan out is
    %
    % Bits to fan out:
    %   $ b_{fo} = b_r \frac{8}{g(b_r, 8)} $ where $ b_r $ is
    % bit alignment of the rule and $ g $ is GCD function.
    %
    % Bytes to fan out then is:
    %   $ B_{fo} =  \frac{b_{fo}}{8} = \frac{b_r}{g(b_r, 8)} $
    %
    BytesToFanOut = BitsPerRule div GreatestCommonDenominator,
    fan_bytes_do(BytesToFanOut, Workers, FH).

-spec fan_bytes_do(BytesToFanOut :: non_neg_integer(),
                   Workers :: [pid()],
                   FH :: file:io_device()) ->
                      {error, undefined}.
fan_bytes_do(BytesToFanOut, Workers, FH) ->
    io:format("Fanning out ~p bytes.~nFrom: ~p~nTo: ~p~n", [BytesToFanOut, Workers, FH]),
    {error, undefined}.

write_src(FileName, IOListSrc) ->
    Source = lists:join(<<"\n">>, IOListSrc),
    file:write_file(FileName, Source).

% dirty-dirty
main_fan() ->
    fan_bytes(3, [], self()), % Should be 3
    fan_bytes(4, [], self()), % Should be 1
    fan_bytes(16, [], self()), % Should be 2
    fan_bytes(17, [], self()), % Should be 17
    fan_bytes(101, [], self()). % Should be 101

main() ->
    Program =
        [<<"-module(local).">>, <<"-export([main/0]).">>, <<"main() -> io:format(\"hi?\").">>],
    FileName = "local.erl",
    StrModuleName = "local",
    write_src(FileName, Program),
    {ok, Module} = compile:file(StrModuleName),
    io:format("~p~n", [Module:module_info()]),
    Module:main().

    %% AtomModuleName = local, % a function string -> atom, that strips .erl, but ok

A  => config/sys.config +3 -0
@@ 1,3 @@
[
  {bed, []}
].

A  => config/vm.args +6 -0
@@ 1,6 @@
-sname bed

-setcookie bed_cookie

+K true
+A30

A  => local.erl +3 -0
@@ 1,3 @@
-module(local).
-export([main/0]).
main() -> io:format("hi?").
\ No newline at end of file

A  => rebar.config +13 -0
@@ 1,13 @@
{ erl_opts , [ debug_info ] } .

{ deps , [ ] } .

{ relx , [
  { release , { bed , "0.1.0" } , [ bed , sasl ] } ,
  { mode , dev } , { sys_config , "./config/sys.config" } ,
  { vm_args , "./config/vm.args" }
] } .

{ profiles , [ { prod , [ { relx , [ { mode , prod } ] } ] } ] } .

{ plugins , [ rebar3_format ] } .

A  => rebar.lock +1 -0
@@ 1,1 @@
[].