Romen's eSpace: October 2008

Tuesday 21 October 2008

Erlang Mnesia - Take 2

After my first attempt of using Erlang Mnesia, I decided to apply what I have learned so far to refactor and improve my Erlang Mnesia implementation of the Address database and Data Access Layer (DAL).

Redesigning The Database

This time I de-normalised part of the database design -

to duplicate the hierarchy_order field from address_field_type to address_field table
removed the record id fields from the 3 entities:
- address_field_type is a reference data table with small amount of data. I use the locale_country + hierarchy_order as a combined key implicitly
- address and address_field both have the location_code field which is unique for each record, so I used location_code as their primary key

The address.hrl file:

-record(address_field_type, {
 locale_country,
 name,
 default_value,
 hierarchy_order,
 display_order,
 validation_rules,
 suffix} ).
 
-record(address_field, {
 location_code,
 value,
 locale_country,  %% denormailsed from address_field_type
 hierarchy_order  %% denormalised from address_field_type
 }).
 
-record(address, {
 location_code,
 status
 }).
 
-record(address_addressField, {
 address_locationCode,
 address_field_locationCode
 }).

Creating the database:

create_db() ->
 mnesia:create_table(address_field_type,
  [
   {type, bag}, %% use bag because no single-filed primary key now
   {ram_copies,[nonode@nohost]},
   {index, [hierarchy_order]},
   {attributes, record_info(fields, address_field_type)}
  ]),
 mnesia:create_table(address_field,
  [
   {ram_copies,[nonode@nohost]},
   {attributes, record_info(fields, address_field)}
  ]),
 mnesia:create_table(address,
  [
   {ram_copies,[nonode@nohost]},
   {attributes, record_info(fields, address)}
  ]),
 mnesia:create_table(address_addressField,
  [
   {type, bag},
   {ram_copies,[nonode@nohost]},
   {index, [address_field_locationCode]},
   {attributes, record_info(fields, address_addressField)}
  ]).

The above changes do reduce the functionality of the Address application very slightly but it's a worthy trade-off as it simplifies the design - now I don't need the oid table/record any more along with all the functions and code revolving the oids.

Also, by de-normialising, i.e. duplicating the hierarchy_order field into address_field table, the sorting of the address_field list is made easy and efficient (without having to look up the database in every iteration). This is demonstrated in the code below.

Simplified Sorting

The sorting of address_field list has reduced from 3 functions previously to the use of lists:keysort/2 due to the duplication of the hierarchy_order field from address_field_type into address_field in the data/entity model.

sort_afs(Afs) when is_list(Afs) ->
 %% 5th field of the address_field tuple is hierarchy_order
 lists:keysort(5, Afs).

Simplified Data Insertion

By skipping any record id handling, the code for insertion of records is considerably simplified. This is most evident from the insert_address/2 function (compared to the previous version):

insert_aft(Aft) when is_record(Aft, address_field_type) ->
 Fun = fun() ->
  mnesia:write(Aft)
 end,
 mnesia:transaction(Fun),
 Aft.
 
insert_address(A, Afs) when is_record(A, address) and is_list(Afs) ->
 {NewA, NewAfs} = address:generate_location_code(A, Afs),
 Fun = fun() ->  
  % create the new address record
  mnesia:write(NewA),
  
  % now insert/update into address_field 
  % and insert into address_addressField table
  lists:foreach( fun(Af) ->
    mnesia:write(Af),
    A_Af=#address_addressField{ 
     address_locationCode=NewA#address.location_code,
     address_field_locationCode=Af#address_field.location_code
    },
    mnesia:write(A_Af)
   end,
   NewAfs),
  {NewA, NewAfs}
 end,
 mnesia:transaction(Fun).

Simplified Data Queries

By using dirty_read and dirty_index_read functions unnecessary transactions are avoided.

find_afts(LocaleCountry) when is_list(LocaleCountry) ->
 mnesia:dirty_read({address_field_type, LocaleCountry}).

find_address(Id) when is_integer(Id) ->
 [A] = mnesia:dirty_read({address, Id}),
 A.
 
find_address_field(Id) when is_integer(Id) ->
 [Af] = mnesia:dirty_read({address_field, Id}),
 Af.
 
find_address_field_codes(A_code) ->
 % the read will return a list of tuples, we want the 3rd field (the AF_locationCode) 
 % of each tuple and put them in a list.
 [ element(3, A) || A <- mnesia:dirty_read({address_addressField, A_code}),
  true ].
 
find_address_codes(Af_code) ->
 F = fun() ->
  mnesia:select(address_addressField, [{
   #address_addressField{ address_locationCode='$1',
    address_field_locationCode=Af_code, 
    _='_' },
    [], ['$1'] }] 
  )
 end,
 {atomic, Results}=mnesia:transaction(F),
 Results.
 
find_address_codes2(Af_code) ->
 F = fun() ->
  Q = qlc:q([Aaf#address_addressField.address_locationCode
   || Aaf <- mnesia:table(address_addressField),
   Aaf#address_addressField.address_field_locationCode==Af_code]),
  qlc:e(Q)
 end,
 {atomic, Results}=mnesia:transaction(F),
 Results.
 
find_address_codes3(Af_code) ->
 Aafs = mnesia:dirty_index_read(address_addressField, Af_code, 
  #address_addressField.address_field_locationCode),
 % the second element of the tuple is the address_locationCode
 [ element(2, Aaf) || Aaf <- Aafs, true ].

In the above code, find_address_codes/1, find_address_codes2/1, find_address_codes3/1 do the same thing but are implemented using different approaches. The dirty read one is the simplest.

Conclusion

Just by thinking in Erlang, the resulting database design has been simplified - less table and fields; the source lines of code (SLOC) has been reduced by 30%; most importantly, the programming logic is now simpler and easier to read/maintain.

However, this does not mean that Erlang and Mnesia are the best way to implement this kind of database applications (where normalisation is important for the business logic).

Saturday 18 October 2008

Erlang Mnesia

Carrying out my Erlang learning plan, I embarked on the porting exercise of my Address DAO package into Erlang Mnesia. In fact, it is more than porting of the data access functions. It is also a porting of the Address database itself from RDBMS into Mnesia.

Creating the Mnesia Schema

The Entity Relationship Diagram (ERD) of the database schema is shown below.

To create the tables in Mnesia, I defined the following records, each corresponding to a table.

-record(address_field_type, {id,
 name,
 locale_country,
 default_value,
 hierarchy_order,
 display_order,
 validation_rules,
 suffix} ).
 
-record(address_field, {id,
 value,
 address_field_type_id,
 location_code}).
 
-record(address, {id,
 status,
 location_code}).
 
-record(address_addressField, {
 address_id,
 address_field_id
 }).
-record(oids, {
 name,
 id
 }).

Note that there is an extra record oids. This record/table is required because Mnesia does not support auto-incrementing fields usually used as primary key generation. Therefore this extra table is used to store the current key value - similar to Oracle's sequence.

Creating the tables are shown below. The oids table is also populated with seed values.

create_db() ->
 mnesia:create_table(address_field_type,
  [
   {ram_copies,[nonode@nohost]},
   {attributes, record_info(fields, address_field_type)}
  ]),
 mnesia:create_table(address_field,
  [
   {ram_copies,[nonode@nohost]},
   {index, [location_code]},
   {attributes, record_info(fields, address_field)}
  ]),
 mnesia:create_table(address,
  [
   {ram_copies,[nonode@nohost]},
   {index, [location_code]},
   {attributes, record_info(fields, address)}
  ]),
 mnesia:create_table(address_addressField,
  [
   {type, bag},
   {ram_copies,[nonode@nohost]},
   {index, [address_field_id]},
   {attributes, record_info(fields, address_addressField)}
  ]),
 mnesia:create_table(oids,
  [
   {attributes, record_info(fields, oids)}
  ]),
 Fun = fun() ->
  mnesia:write(#oids{ name=address, id=0 }),
  mnesia:write(#oids{ name=address_field_type, id=0 }),
  mnesia:write(#oids{ name=address_field, id=0})
 end,
 mnesia:transaction(Fun).

Simple CRUD

The address_field_type is a table for storing reference data, which does not rely on other tables. So the C and R of the CRUD is very simple. From address_db.erl file:

%% returns {atomic, Oid} or {aborted, Reason}
generate_oid(TableName) when is_atom(TableName) ->
 Fun = fun() ->
  [Oid] = mnesia:read(oids, TableName, write),
  %% because Erlang only supports single assignment
  %% I have to create new variables every time the value changes.
  NewId=Oid#oids.id+1,
  New = Oid#oids{ id=NewId },
  mnesia:write(New),
  NewId
 end,
 mnesia:transaction(Fun).
  
insert_aft(Aft) when is_record(Aft, address_field_type) ->
 Fun = fun() ->
  {atomic, Id}=generate_oid(address_field_type),
  New = Aft#address_field_type{ id=Id },
  mnesia:write(New),
  New
 end,
 mnesia:transaction(Fun).

find_aft(Id) when is_integer(Id) ->
 {atomic, [Aft]} = mnesia:transaction(fun()-> mnesia:read({address_field_type, Id}) end),
 Aft.

There are a few things worth noting:

Although the mnesia:read and mnesia:select functions can be read-only, they still need to be wrapped in a function and called by mnesia:transaction/1. This does not make sense to me.
The mnesia:transaction(Fun) returns {aborted, Reason} or {atomic, Result} where Result is whatever the return result of the Fun is. Therefore, the insert_aft/1 above returns New - the new address_field_type record with the id field populated.
Erlang is single assignment language - i.e. a variable can only be assigned value once. Therefore, if I want to assign the Aft#address_field_type.id, I have to create a whole new address_field_type record as in the insert_aft/1 above. I have to say, I am not a fan of this single assignment - it defeats the half of the purpose of having variables.

A Slightly More Complicated CRUD

The address entity relies on address_field, which in turn depends on address_field_type. Also, if address records share the same address_field values, then the address_field record should be shared and not be duplicated in the database. Therefore, the management of the address records are more complicated than those of address_field_type.

The creation of address records is shown below.

insert_address(A, Afs) when is_record(A, address) and is_list(Afs) ->
 {A2, Afs2} = address:generate_location_code(A, Afs),
 Fun = fun() ->
  % populate the address_field records' id field
  NewAfs=lists:foldl( fun(Af, NewAfs) ->
    % check if the same Af is already in database
    Result = mnesia:select(address_field, [{
     #address_field{ id='$1',
      location_code=Af#address_field.location_code, 
      _='_' },
     [], ['$1'] }] 
    ),
    if length(Result)==0 -> % Af not in DB, so insert it
     {atomic, Id}=generate_oid(address_field);
    true -> % Af already exists in database
     Id=lists:nth(1, Result)
    end,
    NewAf = Af#address_field{ id=Id },
    lists:append(NewAfs, [NewAf])
   end,
   [],
   Afs2),
  
  % create the new address record
  {atomic, AddressId} = generate_oid(address),
  NewA = A2#address{ id = AddressId },
  mnesia:write(NewA),
  
  % now insert/update into address_field 
  % and insert into address_addressField table
  lists:foreach( fun(Af) ->
    mnesia:write(Af),
    A_Af=#address_addressField{ 
     address_id=AddressId,
     address_field_id=Af#address_field.id
    },
    mnesia:write(A_Af)
   end,
   NewAfs),
  {NewA, NewAfs}
 end,
 mnesia:transaction(Fun).

From the address.erl file:

%% applying quick sort to AddressField list.
sort_afs([]) -> [];
sort_afs([Pivot|Tail]) when is_record(Pivot, address_field) ->
 sort_afs([Af || Af <- Tail, 
  compare_aft(address_db:find_aft(Af#address_field.address_field_type_id),
   address_db:find_aft(Pivot#address_field.address_field_type_id)) < 0])
  ++ [Pivot] ++
  sort_afs([Af || Af <- Tail, 
  compare_aft(address_db:find_aft(Af#address_field.address_field_type_id),
   address_db:find_aft(Pivot#address_field.address_field_type_id)) >= 0]).

compare_aft(Aft1, Aft2) when 
 is_record(Aft1, address_field_type) and is_record(Aft2, address_field_type) ->
 Aft1#address_field_type.hierarchy_order - Aft2#address_field_type.hierarchy_order.

generate_location_code(A, AddressFields) when is_record(A, address) 
 and is_list(AddressFields) and (length(AddressFields)>0) ->
 Afs=generate_location_code(AddressFields),
 FirstAf=lists:nth(1, Afs),
 { 
  A#address{ location_code=FirstAf#address_field.location_code },
  Afs
 }.

generate_location_code(Afs) ->
 ReverseSortedAfs=lists:reverse(sort_afs(Afs)),
 generate_location_code(ReverseSortedAfs, [], []).
 
generate_location_code([Head|Tail], String, NewAfs) when is_record(Head, address_field) ->
 StringValue = Head#address_field.value ++ String,
 Code=erlang:phash2(StringValue),
 generate_location_code(Tail, 
  StringValue,
  [ Head#address_field{ location_code=Code } ] ++ NewAfs);  
generate_location_code([], _String, NewAfs) ->
 NewAfs.

The following functions are queries from the address_db.erl file:

find_address(Id) when is_integer(Id) ->
 {atomic, [A]} = mnesia:transaction(fun()-> mnesia:read({address, Id}) end),
 A.
 
find_address_field(Id) when is_integer(Id) ->
 {atomic, [Af]} = mnesia:transaction(fun()-> mnesia:read({address_field, Id}) end),
 Af.
 
find_address_field_ids(A_id) ->
 mnesia:transaction(fun()-> mnesia:read({address_addressField, A_id}) end).
 
find_address_ids(Af_id) ->
 F = fun() ->
  mnesia:select(address_addressField, [{
   #address_addressField{ address_id='$1',
    address_field_id=Af_id, 
    _='_' },
    [], ['$1'] }] 
  )
 end,
 mnesia:transaction(F).
 
find_address_ids2(Af_id) ->
 F = fun() ->
  Q = qlc:q([Aaf#address_addressField.address_id 
   || Aaf <- mnesia:table(address_addressField),
   Aaf#address_addressField.address_field_id==Af_id]),
  qlc:e(Q)
 end,
 mnesia:transaction(F).

Things that I want to highlight from the above are:

Unlike Java or C#, Erlang does not have a sortable list out-of-the-box. Instead, I had to write functions that sort my list of address_field records - these are: sort_afs, compare_aft
When searching using the primary key field, mnesia:read is used; when searching using other fields, either use mnesia:select or QLC. In the above code, the functions find_address_ids/1 and find_address_ids2/1 do the same thing. However, the result (a list) given by find_address_ids2/1 are reversed.
If a record with the same primary key already exists in the table, the mnesia:write either creates (if table type if set) or updates (if table type is bag) the record in the table.
Again, due to the single assignment rule, the lists:append(List, AnotherList) does not change the List; instead, the new list is the return value of lists:append/2. This is just counter-intuitive.

Related Posts:

Erlang Mnesia - Take 2

Monday 13 October 2008

Erlang ODBC

Continuing my Erlang journey, I decided to try with Erlang ODBC by porting one of my toy applications. Perhaps not a wise choice since working with database (especially relational database) is not a strength of Erlang, at least for now.

After almost 10 years and being spoiled by myriad of Object-Relational Mapping (ORM) frameworks, using ODBC again feels like a giant leap backward - 3 generations back to be exact (ADO.Net, ADO, ODBC). So the overall development experience is not that great. This is not to say that Erlang is not a great language, but Erlang as a platform is quite narrow in scope (this makes sense considering Erlang's strong telco heritage).

So here is my porting exercise. Note that the code below is my feeble attempt at using/learning Erlang by reading the man-pages alone, so if you have suggestions to improve it, please drop a comment. Also, it is not robust since it does not handle error conditions.

Background

The application (or component) that I am working on is a Data Access Objects (DAO) package which I originally developed in Java using various ORM frameworks and then C# using various 3rd-party ORM and ADO.NET frameworks. The data model is quite simple, consisting 3 entities: AddressFieldType (AFT for short), AddressField (AF for short) and Address. The relationships among them are as following:

AFT - AF has a one-to-many relationship
Address - AF has a many-to-many relationship

The partial entity relationship diagram (ERD) can be found in my previous blog.

Here, I attempt to implement some of the interfaces from the DAO package using Erlang ODBC.

The Design

Since Erlang is not an object oriented language, I cannot call my module DAO, so I call it DAL (Data Access Layer) instead borrowing from Microsoft terminology. I have two simple modules:

address_dal: implementation of some of the interfaces retrieving records from the Address database (using MySQL 5.x with ODBC driver 5.1.5 for win32 downloaded from MySQL).
orm: ORM here means ODBC-Record Mapping. There are two types of functions in this module: those that provide ODBC template functions; and those that convert ODBC returned data into Erlang Records defined in address.hrl file.

Records

I want to work with Erlang records rather than tuples returned by ODBC calls. So I created these records to represent the domain model. From the address.hrl file:

-record(address_field_type, {id,
 name,
 locale_country,
 default_value,
 hierarchy_order,
 display_order,
 validation_rules,
 suffix} ).
 
-record(address_field, {id,
 value,
 address_field_type_id,
 location_code}).
 
-record(address, {id,
 status,
 location_code,
 address_fields = [#address_field{}]}).

Notice the address record has a list of address_field records, representing half of the many-to-many relationship.

ODBC Template

I need to surround my SQL queries with database connection and disconnection so that these boiler plate codes do not get scattered everywhere in my business logic. Also, if the SQL queries results in error, Erlang breaks the ODBC connection (I am not sure if this is Erlang's fault or ODBC's). So I have to reconnect before my next SQL query anyway.

So in my orm.erl I have these ODBC template/boiler-plate functions:

-define(CONNECTION_STRING, "DSN=Address-MySQL;UID=root").

connect() ->
 case proplists:is_defined(odbc,application:which_applications()) of
  false ->
   application:start(odbc); % pre R12 way of starting odbc;
  _Else ->
   false % do nothing
 end,
 odbc:connect(?CONNECTION_STRING, [
  {auto_commit, off}, 
  {scrollable_cursor, off} 
 ]).
 
sql_query(Query) ->
 {ok, Ref}=connect(),
 ResultTuple = odbc:sql_query(Ref, Query),
 odbc:disconnect(Ref),
 ResultTuple.
 
param_query(Query, Params) ->
 {ok, Ref}=connect(),
 ResultTuple = odbc:param_query(Ref, Query, Params),
 odbc:disconnect(Ref),
 ResultTuple.

Note that Erlang ODBC is based on ODBC v3 which supports connection pooling which has been turned on. So surrounding the SQL queries with connection and disconnection should not incur performance penalty (although I have not explicitly tested it).

One thing I hate about Erlang's odbc:connect() is that it does not give any comprehensive error reasons if the connection fails - it always says "No SQL-driver information available. Connection to database failed." Again, I am not sure if this is Erlang's or ODBC's fault.

Converting into Records

The ODBC query functions return the results as tuples or list of tuples. I want to convert them into recdords so that I can access the fields of the records more easily - e.g. #address.id.

I tried two approaches implementing the record constructor functions. The first one is more portable since it does not rely on the field position but uses column name as a clue to map to the record's fields. This is shown below from the orm.erl file mapping the AddressFieldType query results to their corresponding address_field_type record type:

construct_aft_records(ColNames, Rows) when erlang:is_list(ColNames) and (length(ColNames)>0)
 and erlang:is_list(Rows) ->
 if (length(Rows)>0) ->
  lists:foldl(fun(R, AftRecords) ->
    lists:append(AftRecords, [construct_aft_record(ColNames, R)])
   end,
   [], % initial value of AftRecords list
   Rows
  );
 true->
  []
 end.

   
construct_aft_record(ColNames, Row) when erlang:is_list(ColNames) and (length(ColNames)>0)
 and erlang:is_tuple(Row) and (size(Row) == length(ColNames)) ->
 Map=lists:foldl(fun(N, Map) ->
  lists:append(Map, 
   [{string:to_upper(lists:nth(N, ColNames)), 
   element(N,Row)}]
  )
  end,
  [], % initial value of Map is []
  lists:seq(1, length(ColNames))
 ),
 #address_field_type{
  id=extract_map_value(Map, "ADDRESSFIELDTYPEID"),
  name=extract_map_value(Map, "NAME"),
  locale_country=extract_map_value(Map, "LOCALECOUNTRY"),
  default_value=extract_map_value(Map, "DEFAULTVALUE"),
  hierarchy_order=extract_map_value(Map, "HIERARCHYORDER"),
  display_order=extract_map_value(Map, "DISPLAYORDER"),
  validation_rules=extract_map_value(Map, "VALIDATIONRULES"),
  suffix=extract_map_value(Map, "SUFFIX")
 }.

 
extract_map_value(Map, Key) ->
  element(2, element(2,lists:keysearch(Key, 1, Map))).

The second approach is lazier as it relies on the field position as specified in the SELECT SQL statement. This is shown below from the orm.erl file.

find_addresses(Query, Params) ->
 {ok, Ref}=connect(),
 {selected, _, Rows} = odbc:param_query(Ref, Query, Params),
 % now for each address Row fetch its addressField records
 AddressRecords = lists:foldl(fun(A, Records) ->
   AddressId=element(1,A),
   AddressStatus=element(2,A),
   AddressLocationCode=element(3,A),
   {selected, _, AfRows}=odbc:param_query(Ref,
    "SELECT af.addressFieldId, af.value, af.addressFieldTypeId,"
    "       af.locationCode, aft.hierarchyOrder"
    " FROM Address.addressField as af, Address.AddressFieldType as aft,"
         "      Address.address_addressField as aaf"
    " WHERE aaf.addressId=?"
      "   AND af.addressFieldId=aaf.addressFieldId"
      "   AND aft.addressFieldTypeId=af.addressFieldTypeId"
            " ORDER BY aft.hierarchyOrder",
            [{sql_integer, [AddressId]}]
           ),
           AfRecords = lists:foldl(fun(Af, AddressFieldRecords) ->
             AfRecord=#address_field{
              id=element(1, Af),
      value=element(2, Af),
      address_field_type_id=element(3, Af),
      location_code=element(4, Af)
             },
             lists:append(AddressFieldRecords, [AfRecord])
            end,
            [],
            AfRows
           ),
           AddressRecord= #address{
            id=AddressId,
    status=AddressStatus,
    location_code=AddressLocationCode,
    address_fields=AfRecords
   },
   lists:append(Records, [AddressRecord])
          end,
  [],
  Rows
 ),
 odbc:disconnect(Ref),
 AddressRecords.

Implementing the DAL Interfaces

Now that the boiler-plate code is done, I can implement the DAL interface methods, oops! I mean functions. Here is the address_dal.erl file.

%% Implementation of the Address Data Access Layer (DAL) interfaces.
-module(address_dal).
-export([find_aft/0, find_aft/1]).
-export([find_addresses_in_af/1, find_addresses_in_location/1]).
-include("address.hrl").
 
find_aft() ->
 {selected, ColNames, Rows} = orm:sql_query("SELECT * from Address.AddressFieldType"),
 orm:construct_aft_records(ColNames, Rows).
 
find_aft(LocaleCountry) ->
 {selected, ColNames, Rows} = orm:param_query("SELECT * from Address.AddressFieldType "
  "WHERE localeCountry=?", 
  [{{sql_varchar, 64}, [LocaleCountry]} ]
 ),
 orm:construct_aft_records(ColNames, Rows).


find_addresses_in_af(AddressFieldId) ->
 orm:find_addresses( 
  "SELECT a.addressID, a.status, a.locationCode"
  " FROM Address.Address as a, Address.Address_AddressField as aaf"
  " WHERE aaf.ADDRESSFIELDID = ?"
  " AND a.ADDRESSID=aaf.ADDRESSID",
  [{sql_integer, [AddressFieldId]}] 
 ).
 
% the input parameter LocationCode needs to be a string due to the ODBC 
% to Erlang datatype mapping - hence the test for is_list().
find_addresses_in_location(LocationCode) when is_list(LocationCode) ->
 orm:find_addresses("SELECT a.addressID, a.status, a.locationCode"
  " FROM Address.Address as a, Address.AddressField as af, Address.Address_AddressField as aaf"
  " WHERE af.LOCATIONCODE = ?"
  "   AND a.ADDRESSID=aaf.ADDRESSID"
         "   AND af.ADDRESSFIELDID=aaf.ADDRESSFIELDID",
  [{{sql_numeric, 32, 0}, [LocationCode]}] 
 ).

Notice that in the find_aft functions I can use 'SELECT *...' because the orm:construct_aft_records/2 function uses the column names to map to the address_field_type record; on the other hand, the find_addresses... functions have to use 'SELECT a.addressID, a.status, a.locationCode...' and in that strict order because the orm:find_addresses/2 function expects the columns to be in those positions.

Thursday 9 October 2008

Dealing with Databases in Erlang

Correction: Thanks to Brandon's comment below, there is also Erlang ODBC which comes with the OTP distribution. This is the 'official' gateway to the RDBMS world. I had a go at it in this post. There is also CouchDB from Apache, which is still in incubation.

Currently there are basically two ways to use database in Erlang - Mnesia and ErlyDB. There is also the prospect of edbc. But nothing is available yet from the project.

Mnesia is the native DBMS in Erlang. It is bundled with Erlang/OTP distribution and is written in Erlang as well. It was originally developed for telco switching applications by Ericsson. Therefore, it boasts a lot of non-functional features - in-memory database, distributed database, high performance (for read), making changes without downtime, etc. However, all these benefits are at the cost of sacrificing many of the basic functionality of a traditional RDBMS - such as referential integrity check, data type validation/check, normalisation support, triggers, stored procedures, etc.

ErlyDB is essentially Yariv's one-man effort. It's a code generator to generate the data access layer code in Erlang to work with RDBMS (and perhaps Mnesia). Currently supported RDBMS are MySQL and Postgress. It's primary use case was ErlyWeb from the same author. It looked promising. However, the whole ErlyWeb and ErlyDB/ErlySQL seem to have lost its momentum: The last ErlyWeb release was over a year ago; ErlyDB is no longer a project on its own right, therefore, no information on its releases; promised features from Yariv's blog have not eventuate after 1 to 2 years.

So given the database landscape in Erlang, the most viable approach is to use Mnesia if you don't mind the lack of database features and tools. I will give it a try by porting my prototype Address database into Mnesia.

Sunday 5 October 2008

Consuming RESTful Service with Erlang

Having been working in the telco industry for over 10 years, I can't help feeling a bit ashamed for not having learned Erlang. Erlang was developed by Ericsson - the leading Swedish-based network equipment provider, and has been used in many of the network switching equipment produced by Ericsson. Erlang has gained more traction recently, especially in the last year or two.

Here, I write an Erlang program to consume a demo RESTful service that I developed a couple of months ago. The Erlang code is based on example by Dave Thomas - the author of Programming Ruby. The detail of the RESTful service is available in my previous post.

There are two services that I want to consume.

The first one accepts a social web site URL and scrapes the page for the person's interests in movies and music to return a list of comma-delimited keywords. For example,

http://localhost:8080/SvdemoRestful/resources/webPageKeywords?url=http://localhost/someWebPage.html

if any keywords are found, then they are returned in the body of the HTTP response as a string; otherwise, the HTTP body is empty. For example,

folk songs, pop music, chinese music ,battle, action, comedy

Notice the spaces in the above string. The second service I want to consume accepts the above string to search for any matching promotion in a mock up XML database. If any is found then the XML string will be returned in the HTTP body; otherwise, the string <Empty/> is returned in the HTTP body. For example, the following URL will return the promotion information in XML below:

http://localhost:8080/SvdemoRestful/resources/promo/jazz

the returned XML:

<Promotion>
  <Code>802</Code> 
  <Name>Jazz Night</Name> 
  <Description>Jazz lovers' do not miss this once in a lifetime opportunity.</Description> 
  <Venue>The Jazz Club</Venue> 
  <DateTime>2008-10-30 21:00:00</DateTime> 
  <Tags>Jazz</Tags> 
</Promotion>

Now, let's do this in Erlang.

Create a file called svdemoClient.erl. The following code will consume the first RESTful service:

-module(svdemoClient).
-export([get_keywords/1]).

-define(BASE_URL, "http://localhost:8080/SvdemoRestful/resources").
-define(PROMO_URL, ?BASE_URL ++ "/promo/").
-define(KEYWORDS_URL, ?BASE_URL "/webPageKeywords"). % also works without ++

keywords_url_for(Url) -> ?KEYWORDS_URL ++ "?url=" ++ Url.
get_keywords(Url) ->
 URL = keywords_url_for(Url),
 { ok, {_Status, _Header, Body} } = http:request(URL),
 Body.

In Erlang, function names and Atoms must start with lower-case letters; variable names must start with upper-case letters or underscore (meaning the variable is not used/read).

The -define() macro in Erlang is similar to #define in C/C++. In the above example, after defining BASE_URL, any occurance of ?BASE_URL will be replaced with the string "http://localhost:8080/SvdemoRestful/resources".

The get_keywords() function returns the body of the HTTP response from requesting the given Url. The Body is either a comma-delimited string, or an empty collection. Executing the above code in Erlang:

127> c("d:/projects/svdemoErl/svdemoClient.erl").                            
{ok,svdemoClient}
128> svdemoClient:get_keywords("http://localhost/myWebPage.htm").     
"folk songs, pop music, chinese music ,battle, action, comedy "
129> svdemoClient:get_keywords("http://localhost/someOtherPage.htm").
[]
130>

To consume the second RESTful service, the search_promo() function is added.

promo_url_for(Keywords) -> ?PROMO_URL ++ utils:url_encode(Keywords).
search_promo(Keywords) ->
 URL = promo_url_for(Keywords), 
 { ok, {_Status, _Header, Body} } = http:request(URL),
 
 %%% Now that the XML is in the Body variable, let's parse it.
 if
  Body == "<Empty/>" ->
   not_found;
  true ->
   {ParseResult, _Misc} = xmerl_scan:string(Body),
   [ #xmlText{value=Code} ] = xmerl_xpath:string("//Code/text()", ParseResult),
   [ #xmlText{value=Name} ] = xmerl_xpath:string("//Name/text()", ParseResult),
   [ #xmlText{value=Description} ] = xmerl_xpath:string("//Description/text()", ParseResult),
   [ #xmlText{value=Venue} ] = xmerl_xpath:string("//Venue/text()", ParseResult),
   [ #xmlText{value=DateTime} ] = xmerl_xpath:string("//DateTime/text()", ParseResult),
   { Code, Name, Description, Venue, DateTime }
 end.

Erlang/OTP download comes with XML parser and XPath support in the xmerl application, which is not part of the Erlang standard library (stdlib). To use the XML functions, the header file must be included:

-include_lib("xmerl/include/xmerl.hrl").

Note that the keywords contain spaces, which must be URL-encoded before passing to Erlang's http:request() function. I stole the url_encode() function from YAWS and put it in utils.erl file.

To string the two service consumptions together:

search_promo_from_url(Url) ->
 Keywords=get_keywords(Url),
 if
  Keywords == [] ->
   not_found;
  true ->
   search_promo(Keywords)
 end.

Calling the function in Erlang shell:

126> svdemoClient:search_promo_from_url("http://localhost/MyWebPage.htm")
. 
{"801",
 "Batman The Dark Knight",
 "\n\t\t\tMeet stars in Batman in person - Chritian Bale, Michael Caine.\n\t\t",

 "Star City",
 "2008-7-30 10:00:00"}

The final svdemoClient.erl file:

-module(svdemoClient).
-export([get_keywords/1, search_promo/1, search_promo_from_url/1]).
-include_lib("xmerl/include/xmerl.hrl").

-define(BASE_URL, "http://localhost:8080/SvdemoRestful/resources").
-define(PROMO_URL, ?BASE_URL ++ "/promo/").
-define(KEYWORDS_URL, ?BASE_URL "/webPageKeywords"). % also works without ++

keywords_url_for(Url) -> ?KEYWORDS_URL ++ "?url=" ++ Url.
get_keywords(Url) ->
 URL = keywords_url_for(Url),
 { ok, {_Status, _Header, Body} } = http:request(URL),
 Body.

promo_url_for(Keywords) -> ?PROMO_URL ++ utils:url_encode(Keywords).
search_promo(Keywords) ->
 URL = promo_url_for(Keywords), 
 { ok, {_Status, _Header, Body} } = http:request(URL),
 
 %%% Now that the XML is in the Body variable, let's parse it.
 if
  Body == "<Empty/>" ->
   not_found;
  true ->
   {ParseResult, _Misc} = xmerl_scan:string(Body),
   [ #xmlText{value=Code} ] = xmerl_xpath:string("//Code/text()", ParseResult),
   [ #xmlText{value=Name} ] = xmerl_xpath:string("//Name/text()", ParseResult),
   [ #xmlText{value=Description} ] = xmerl_xpath:string("//Description/text()", ParseResult),
   [ #xmlText{value=Venue} ] = xmerl_xpath:string("//Venue/text()", ParseResult),
   [ #xmlText{value=DateTime} ] = xmerl_xpath:string("//DateTime/text()", ParseResult),
   { Code, Name, Description, Venue, DateTime }
 end.
 
search_promo_from_url(Url) ->
 Keywords=get_keywords(Url),
 if
  Keywords == [] ->
   not_found;
  true ->
   search_promo(Keywords)
 end.

The utils.erl file (copied from YAWS):

-module(utils).
-export([integer_to_hex/1, url_encode/1]).

integer_to_hex(I) ->
     case catch erlang:integer_to_list(I, 16) of
         {'EXIT', _} ->
             old_integer_to_hex(I);
         Int ->
             Int
     end.
 
 
old_integer_to_hex(I) when I<10 ->
     integer_to_list(I);
old_integer_to_hex(I) when I<16 ->
     [I-10+$A];
old_integer_to_hex(I) when I>=16 ->
     N = trunc(I/16),
     old_integer_to_hex(N) ++ old_integer_to_hex(I rem 16).
 

url_encode([H|T]) ->
     if
         H >= $a, $z >= H ->
             [H|url_encode(T)];
         H >= $A, $Z >= H ->
             [H|url_encode(T)];
         H >= $0, $9 >= H ->
             [H|url_encode(T)];
         H == $_; H == $.; H == $-; H == $/; H == $: -> % FIXME: more..
             [H|url_encode(T)];
         true ->
             case integer_to_hex(H) of
                 [X, Y] ->
                     [$%, X, Y | url_encode(T)];
                 [X] ->
                     [$%, $0, X | url_encode(T)]
             end
     end;
 
url_encode([]) ->
     [].

Romen's eSpace