#!/bin/bash # tohtml.sh [v. 0.2, reldate: 06/26/08, still buggy] # Convert a text file to HTML format. # Author: Mendel Cooper # License: GPL3 # Usage: sh tohtml.sh < textfile > htmlfile # Script can easily be modified to accept source and target filenames. # Assumptions: # 1) Paragraphs in (target) text file are separated by a blank line. # 2) Jpeg images (*.jpg) are located in "images" subdirectory. # In the target file, the image names are enclosed in square brackets, # for example, [image01.jpg]. # 3) Emphasized (italic) phrases begin with a space+underscore #+ or the first character on the line is an underscore, #+ and end with an underscore+space or underscore+end-of-line. # Settings FNTSIZE=2 # Small-medium font size IMGDIR="images" # Image directory # Headers HDR01='<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">' HDR02='<!-- Converted to HTML by ***tohtml.sh*** script -->' HDR03='<!-- script author: M. Leo Cooper <thegrendel.abs@gmail.com> -->' HDR10='<html>' HDR11='<head>' HDR11a='</head>' HDR12a='<title>' HDR12b='</title>' HDR121='<META NAME="GENERATOR" CONTENT="tohtml.sh script">' HDR13='<body bgcolor="#dddddd">' # Change background color to suit. HDR14a='<font size=' HDR14b='>' # Footers FTR10='</body>' FTR11='</html>' # Tags BOLD="<b>" CENTER="<center>" END_CENTER="</center>" LF="<br>" write_headers () { echo "$HDR01" echo echo "$HDR02" echo "$HDR03" echo echo echo "$HDR10" echo "$HDR11" echo "$HDR121" echo "$HDR11a" echo "$HDR13" echo echo -n "$HDR14a" echo -n "$FNTSIZE" echo "$HDR14b" echo echo "$BOLD" # Everything in bold (more easily readable). } process_text () { while read line # Read one line at a time. do { if [ ! "$line" ] # Blank line? then # Then new paragraph must follow. echo echo "$LF" # Insert two <br> tags. echo "$LF" echo continue # Skip the underscore test. else # Otherwise . . . if [[ "$line" =~ "\[*jpg\]" ]] # Is a graphic? then # Strip away brackets. temp=$( echo "$line" | sed -e 's/\[//' -e 's/\]//' ) line=""$CENTER" <img src="\"$IMGDIR"/$temp\"> "$END_CENTER" " # Add image tag. # And, center it. fi fi echo "$line" | grep -q _ if [ "$?" -eq 0 ] # If line contains underscore ... then # =================================================== # Convert underscored phrase to italics. temp=$( echo "$line" | sed -e 's/ _/ <i>/' -e 's/_/<\/i> /' | sed -e 's/^_/<i>/' -e 's/_/<\/i>/' ) # Process only underscores prefixed by space, #+ or at beginning or end of line. # Do not convert underscores embedded within a word! line="$temp" # Slows script execution. Can be optimized? # =================================================== fi echo echo "$line" echo } # End while done } # End process_text () write_footers () # Termination tags. { echo "$FTR10" echo "$FTR11" } # main () { # ========= write_headers process_text write_footers # ========= # } exit $? # Exercises: # --------- # 1) Fixup: Check for closing underscore before a comma or period. # 2) Add a test for the presence of a closing underscore #+ in phrases to be italicized.