[Prev][Next][Index][Thread]

Re: Critique a first prog?



I'm posting this on behalf of Neel Krishnaswami, who (as you can see 
below) could only send it to me as email. Thanks to Neel and everyone 
who's responded, you've got some great suggestions. After I've collected 
everyone's responses, I'll post an edited version of the original 
program for 'before and after' comparison.


Bryn


You wrote:
 > I'm new to Dylan, and I don't know anyone personally who knows Dylan,
 > so I'm posting my first Dylan program here for everyone to pick
 > apart.

Welcome! I've attached a commented-up copy of your code. My home
PC (with Dylan on it) is dead at the moment, so I couldn't test my
changes, so caveat emptor. But it should help get the feel across.
(It's also why this is in email rather than a post -- feel free
to post this message to Usenet!)

 > The purpose of the program is to take all the html files in a
 > directory and concatenate them, after first stripping out unwanted
 > lines. Aside from the general lack of error-handling, what could be
 > improved about this code? Does anything about this approach suggest
 > 'non-Dylanesque' thinking? I'm especially interested in the read-lines
 > functions - is there already equivalent functionality available
 > somewhere?

Weirdly, no. It's just a few lines to write one, though.

 > Is the use of blocks and handlers the way you would implement these
 > functions?

This is the only major non-Dylanish thing about your code.

 > Also, is there a regular expressions library available?

There is a semi-working one in GD's common-dylan library. I don't know
if anyone uses it, though. A number of people (including me) have
written their own, and they are in various stages of incompleteness.

-*-*-*-

Module:      docbuilder
Synopsis:    Convert chunked up docs to one printable page.

// If you try to read a large file, then you can suck up a lot
// memory unexpectedly. It's better to iterate over the
// lines one at a time.
//
// If you still want to write something like this, some better solutions
// are:
//
// define method read-lines (stream)
//   let v = make(<stretchy-vector>);
//   block()
//     while(#t)
// 	 v := add!(v, stream.read-line)
//     end while;
//   exception (<end-of-stream-error>)
//     v
//   end block;
// end method read-lines;
//
// define method read-lines (stream)
//   let v = make(<stretchy-vector>);
//   until (stream.stream-at-end?)
//     v := add!(v, stream.read-line)
//   end until;
//   //
//   v
// end method read-lines;
//
// The second is better, but I also wrote the first so you can see
// how to use the exception clause of the block statement.

define method read-lines(stream :: <stream>)
let v = make(<stretchy-vector>);
block(exit)
let handler <end-of-stream-error>
= method(condition, next)
exit();
end;
while (#t)
add!(v, read-line(stream));
end;
end;
v;
end;

// The body of this method is probably better written with
// function composition
//
// define constant $bad = vector("SRC=next.gif", "<HTML>", "<BODY",
// 
			 "<!DOCTYPE", "HEAD>", "<TITLE>",
"<ADDRESS>",
// 
			 "</HTML>", "</BODY", "<LINK", "<META",
// 
			 "<DIV", "</DIV");
//
// define function good-line?(line)
//   ~any?(curry(subsequence-position, line), $bad)
// end function good-line?;
//
// because this matches the English description of the function:
// "False if any of the substrings of the line are one of the bad strings"
// and makes your intent much clearer. If you find the call to curry
// unclear, name the new function:
//
// define function good-line?(line)
//   let substring-of-line? = curry(subsequence-position, line);
//   ~any?(substring-of-line?, $bad)
// end function good-line?;
//
// Also, check common-dylan for the substring-search function. It
// may be a lot faster than a generic subsequence-position because
// it can do a Boyer-Moore search or somesuch. It may be invoked
// automatically though, so it might not help.

define function good-line?(line)
let bad = vector("SRC=next.gif", "<HTML>", "<BODY",
"<!DOCTYPE", "HEAD>", "<TITLE>", "<ADDRESS>", "</HTML>",
"</BODY", "<LINK", "<META", "<DIV", "</DIV");
block(break)
for (thing in bad)
if (subsequence-position(line, thing))
break(#f);
end;
end;
#t;
end;
end;
// Use the with-open-file macro. This will prevent you from failing to
// close fis because of an exception thrown somewhere in read-lines.
//
// Eg (also iterating over each line instead of building a list):
//
//
// define function process-file(filename, writer)
//   with-open-file(fis = make(<file-stream>, locator: filename))
//     until (fis.stream-at-end?)
// 	 let line = fis.read-line;
// 	 if (line.good-line?) writer(line) end if;
//     end until;
//   end with-open-file;
// end function process-file;
//
// Note that I used an explicit until loop rather than a do(). This
// is my own style; I tend to use higher-order functions with purely
// functional code and explicit loops when I manipulate state. This
// is very common but not universal -- choose whichever is prevalent
// in the code you are working with.

define function process-file(filename, writer)
let fis = make(<file-stream>, locator: filename);
let lines = read-lines(fis);
close(fis);
lines := choose(good-line?, lines);
do(writer, lines);
end;

// Make the find-files and html-file? local to docbuilder-top-level, so
// that their definition is close to their use and that they don't
// clutter up the global namespace.

define function find-files(pathname :: <string>, criterion? :: <function>)
let files = make(<stretchy-vector>);
do-directory(compose(curry(add!, files), second, list), pathname);
choose(criterion?, files);
end;

// What about ".html" line endings?

define function html-file?(file)
(subsequence-position(file, ".htm") = (file.size - 4));
end;

// Use the with-open-file macro in this function, again. Also, there's
// no harm in passing the write-stream directly to process-file, and
// letting it call write() itself. This is because you can use the
// <sequence-stream> class to build a collection, if you need it.
//
// This is again a style thing; I just don't like higher-order
// functions that mutate things. If you do use them, please add
// "!" extensions so that the side-effect is clear. Eg, writer!
// instead of writer. (Unfortunately, the streams library doesn't
// do this -- a clear Common Lispism. :)

define function docbuilder-top-level ()
let arguments = application-arguments();
if (arguments.size == 0)
format-out("Usage: %s pathname\n", application-name());
else
format-out("Checking path: %s\n", arguments[0]);
let files = find-files(arguments[0], html-file?);
let fos = make(<file-stream>,
locator: concatenate(arguments[0],"Build.htm"),
direction: #"output",
if-does-not-exist: #"create" );
let write-fos = curry(write-line, fos);
for (file in files)
process-file(concatenate(arguments[0], file), write-fos);
end;
close(fos);
end
end function docbuilder-top-level;

docbuilder-top-level();

--
Neel Krishnaswami
neelk@cswcasa.com

Bryn Keller wrote:

> Hello,
>     I'm new to Dylan, and I don't know anyone personally who knows
> Dylan, so I'm posting my first Dylan program here for everyone to pick
> apart. The purpose of the program is to take all the html files in a
> directory and concatenate them, after first stripping out unwanted
> lines. Aside from the general lack of error-handling, what could be
> improved about this code? Does anything about this approach suggest
> 'non-Dylanesque' thinking? I'm especially interested in the read-lines
> functions - is there already equivalent functionality available
> somewhere? Is the use of blocks and handlers the way you would implement
> these functions? Also, is there a regular expressions library available?
> 
> Thanks,
> 
> Bryn
> 
> 
> 
> Module:      docbuilder
> Synopsis:    Convert chunked up docs to one printable page.
> 
> 
> define method read-lines(stream :: <stream>)
>   let v = make(<stretchy-vector>);
>   block(exit)
>     let handler <end-of-stream-error>
>       = method(condition, next)
>      exit();
>  end;
>     while (#t)
>       add!(v, read-line(stream));
>     end;
>   end;
>   v;
> end;
> 
> 
> define function good-line?(line)
>   let bad = vector("SRC=next.gif", "<HTML>", "<BODY",
>      "<!DOCTYPE", "HEAD>", "<TITLE>", "<ADDRESS>", "</HTML>",
>      "</BODY", "<LINK", "<META", "<DIV", "</DIV");
>   block(break)
>     for (thing in bad)
>       if (subsequence-position(line, thing))
>         break(#f);
>       end;
>     end;
>     #t;
>   end;
> end;
> 
> define function process-file(filename, writer)
>   let fis = make(<file-stream>, locator: filename);
>   let lines = read-lines(fis);
>   close(fis);
>   lines := choose(good-line?, lines);
>   do(writer, lines);
> end;
> 
> define function find-files(pathname :: <string>, criterion? ::
> <function>)
>   let files = make(<stretchy-vector>);
>   do-directory(compose(curry(add!, files), second, list), pathname);
>   choose(criterion?, files);
> end;
> 
> define function html-file?(file)
>   (subsequence-position(file, ".htm") = (file.size - 4));
> end;
> 
> define function docbuilder-top-level ()
>   let arguments = application-arguments();
>   if (arguments.size == 0)
>     format-out("Usage: %s pathname\n", application-name());
>   else
>     format-out("Checking path: %s\n", arguments[0]);
>     let files = find-files(arguments[0], html-file?);
>     let fos = make(<file-stream>, locator: concatenate(arguments[0],
> "Build.htm"),
>      direction: #"output", if-does-not-exist: #"create" );
>     let write-fos = curry(write-line, fos);
>     for (file in files)
>       process-file(concatenate(arguments[0], file), write-fos);
>     end;
>     close(fos);
>   end
> end function docbuilder-top-level;
> 
> docbuilder-top-level();





References: