#
# Sample links dictionary file for Seth Golub's txt2html
# http://www.thehouse.org/~seth/txt2html/
#
# This dictionary contains some patterns for converting obvious URLs,
# ftp sites, hostnames, email addresses and the like to hrefs.
#
# Adapted shamelessly from the html.pl package by Oscar Nierstrasz in
# the Software Archive of the Software Composition Group
# http://iamwww.unibe.ch/~scg/Src/
#
# Email suggestions to seth@thehouse.org
# Please include "txt2html" in the subject of your message.
#

# Urls: <service>:<rest-of-url>
# 
# |snews:[\w\.]+|         -> $&
# |http:[\w/\.:+\-~\%#?]+[\w/]|  -> $&
# |shttp:[\w/\.:+\-~\%#?]+| -> $&
# |https:[\w/\.:+\-~\%#?]+| -> $&
# |file:[\w/\.:+\-]+|     -> $&
# |ftp:[\w/\.:+\-]+|      -> $&
# |wais:[\w/\.:+\-]+|     -> $&
# |gopher:[\w/\.:+\-]+|   -> $&
# |telnet:[\w/\@\.:+\-]+|   -> $&
# 
# 
# # catch some newsgroups to avoid confusion with sites:
# |([^\w\-/\.:\@>])(alt\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
# |([^\w\-/\.:\@>])(bionet\.[\w\.+\-]+[\w+\-]+)| -h-> $1<A HREF="news:$2">$2</A>
# |([^\w\-/\.:\@>])(bit\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
# |([^\w\-/\.:\@>])(biz\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
# |([^\w\-/\.:\@>])(clari\.[\w\.+\-]+[\w+\-]+)|  -h-> $1<A HREF="news:$2">$2</A>
# |([^\w\-/\.:\@>])(comp\.[\w\.+\-]+[\w+\-]+)|   -h-> $1<A HREF="news:$2">$2</A>
# |([^\w\-/\.:\@>])(gnu\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
# |([^\w\-/\.:\@>])(humanities\.[\w\.+\-]+[\w+\-]+)| 
#           -h-> $1<A HREF="news:$2">$2</A>
# |([^\w\-/\.:\@>])(k12\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
# |([^\w\-/\.:\@>])(misc\.[\w\.+\-]+[\w+\-]+)|   -h-> $1<A HREF="news:$2">$2</A>
# |([^\w\-/\.:\@>])(news\.[\w\.+\-]+[\w+\-]+)|   -h-> $1<A HREF="news:$2">$2</A>
# |([^\w\-/\.:\@>])(rec\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
# |([^\w\-/\.:\@>])(soc\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
# |([^\w\-/\.:\@>])(talk\.[\w\.+\-]+[\w+\-]+)|   -h-> $1<A HREF="news:$2">$2</A>
# |([^\w\-/\.:\@>])(us\.[\w\.+\-]+[\w+\-]+)|     -h-> $1<A HREF="news:$2">$2</A>
# |([^\w\-/\.:\@>])(ch\.[\w\.+\-]+[\w+\-]+)|     -h-> $1<A HREF="news:$2">$2</A>
# |([^\w\-/\.:\@>])(de\.[\w\.+\-]+[\w+\-]+)|     -h-> $1<A HREF="news:$2">$2</A>
# 
# # FTP locations (with directory):
# # anonymous@<site>:<path>
# |(anonymous\@)([a-zA-Z][\w\.+\-]+\.[a-zA-Z]{2,}):(\s*)([\w\d+\-/\.]+)|
#   -h-> $1<A HREF="ftp://$2/$4">$2:$4</A>$3
# 
# # ftp@<site>:<path>
# |(ftp\@)([a-zA-Z][\w\.+\-]+\.[a-zA-Z]{2,}):(\s*)([\w\d+\-/\.]+)|
#   -h-> $1<A HREF="ftp://$2/$4">$2:$4</A>$3
# 
# # Email address
# #|[a-zA-Z0-9_\+\-\.]+\@([a-zA-Z0-9][\w\.+\-]+\.[a-zA-Z]{2,})|
# #  -> mailto:$&
# 
# # <site>:<path>
# |([^\w\-/\.:\@>])([a-zA-Z][\w\.+\-]+\.[a-zA-Z]{2,}):(\s*)([\w\d+\-/\.]+)|
#   -h-> $1<A HREF="ftp://$2/$4">$2:$4</A>$3
# 
# # NB: don't confuse an http server with a port number for
# # an FTP location!
# # internet number version: <internet-num>:<path>
# |([^\w\-/\.:\@])(\d{2,}\.\d{2,}\.\d+\.\d+):([\w\d+\-/\.]+)|
#   -h-> $1<A HREF="ftp://$2/$3">$2:$3</A>
# 
# # telnet <site> <port>
# |telnet ([a-zA-Z][\w+\-]+(\.[\w\.+\-]+)+\.[a-zA-Z]{2,})\s+(\d{2,4})|
#   -h-> telnet <A HREF="telnet://$1:$3/">$1 $3</A>
# 
# # ftp <site>
# |ftp ([a-zA-Z][\w+\-]+(\.[\w\.+\-]+)+\.[a-zA-Z]{2,})|
#   -h-> ftp <A HREF="ftp://$1/">$1</A>
# 
# # host with "ftp" in the machine name
# |(^|[^\w\d\-/\.:!])(([a-zA-Z][\w+\-]*)?ftp[\w+\-]*\.[\w\.+\-]+\.[a-zA-Z]{2,})([^\w\d\-/\.:!])|
#   -h-> $1ftp <A HREF="ftp://$2/">$2</A>$4
# 
# # host with "www" in the machine name
# |(^|[^\w\d\-/\.:!])(([a-zA-Z][\w+\-]*)?www[\w+\-]*\.[\w\.+\-]+\.[a-zA-Z]{2,})([^\w\d\-/\.:!])|
#   -h-> $1<A HREF="http://$2/">$2</A>$4
# 
# # <site> <port>
# |([a-zA-Z][\w+\-]+\.[\w+\-]+\.[a-zA-Z]{2,})\s+(\d{2,4})|
#   -h-> <A HREF="telnet://$1:$2/">$1 $2</A>
# 
# # just the site name: <site>
# #|([^\w\-/\.:\@>])([a-zA-Z][\w+\-]+(\.[\w+\-]+)+\.[a-zA-Z]{2,})|
# #  -h-> $1<A HREF="http://$2">$2</A>/
# 
# # just internet numbers with port:
# #|([^\w\-/\.:\@])(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s+(\d{1,4})|
# #  -h-> $1<A HREF="telnet://$2:$3">$2 $3</A>
# 
# # just internet numbers:
# #|([^\w\-/\.:\@])(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})|
# #  -h-> $1<A HREF="telnet://$2">$2</A>
# 
# 
# # (see "relative path") as used by Tom Fine
# # /\(see \"([^\"]+)\"\)/  -> $1.html
# 
# # RFCs
# /RFC ?(\d+)/ -i-> http://www.cis.ohio-state.edu/rfc/rfc$1.txt
# 
# This would turn "f^H_o^H_o^H_" into "<U>foo</U>".  Gross, isn't it?
# Thanks to Mark O'Dell <emark@cns.caltech.edu> for fixing this. 
#
# /(.\\010_)+/ -he-> $tmp = $&;$tmp =~ s@\010_@@g;"<U>$tmp</U>"
# /(_\\010.)+/ -he-> $tmp = $&;$tmp =~ s@_\010@@g;"<U>$tmp</U>"
# /(.\^H_)+/ -he-> $tmp = $&;$tmp =~ s@\^H_@@g;"<U>$tmp</U>"
# /(_\^H.)+/ -he-> $tmp = $&;$tmp =~ s@_\^H@@g;"<U>$tmp</U>"


# Mark _underline stuff_ as <I>italicized stuff</I>
/\b_([\(\)a-z][a-z '-]*[a-z\(\)])_/ -hi-> <I>$1</I>
# Need special case for _x_
/_([-\(\)a-z])_/ -hi-> <I>$1</I>
# Need special case for _number_
/_(\d+)_/ -hi-> <I>$1</I>
# Entities
#/\[E\[([a-z]+)\]E\]/ -hi-> &$1;

# Mark *underline stuff* as <B>boldface stuff</B>
/\*([a-z][a-z '-]*[a-z])\*/ -hi-> <B>$1</B>
# Need special case for *x*
/\*([a-z])\*/ -hi-> <B>$1</B>
# Need special case for *number*
/\*(\d+)\*/ -hi-> <B>$1</B>

# Mark !!bracketed stuff!! as plain HTML
# /!!(.*)!!/ -h-> $1
# Need special case for *x*
# /\[\[([a-z])\]\]/ -hi-> <TT>$1</TT>

# Provide an escape for when you really want an underscore without italicizing
# Does this work?   Nasty hack 20001224 MJD
/\?\?!/ -hi-> <font></font>


# Seth and his amazing conversion program    :-)

"Seth Golub"  -io-> http://www.thehouse.org/~seth/
"txt2html"    -io-> http://www.thehouse.org/txt2html/


# End of sample dictionary

