AppleScript

Convert OmniPage Pro 11 to HTML


This AppleScript is pretty basic; it was built for converting the awfully verbose tables OmniPage Pro 11 will export when you export as Netscape 3.2 HTML (this is the best of the worst export modes). Thus, export your tables thusly, and this script will make them more reasonable. I built this script while transcribing an alluvion of tables in Soviet Union: Facts, Descriptions, & Statistics.

tell application "BBEdit"
activate
replace " bgcolor=\"#FFFFFF\"" using "" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:true, match words:false, extend selection:false}
replace "<font size=\"2\">" using "" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "border=\"0px \">" using "border=\"2\">" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "cellspacing=\"0\"" using "cellspacing=\"2%\"" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace " width=\"\\d\\d%\"" using "" searching in text 1 of text window 1 options {search mode:grep, starting at top:false, wrap around:false, backwards:false, case sensitive:true, match words:false, extend selection:false}
replace "align=\"left\" " using "" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:true, match words:false, extend selection:false}
replace "align=\"right\" " using "" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:true, match words:false, extend selection:false}
replace " valign=\"top\"" using "" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:true, match words:false, extend selection:false}
replace " valign=\"middle\"" using "" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:true, match words:false, extend selection:false}
replace " " using "" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:true, match words:false, extend selection:false}
replace "</font>" using "" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "<br>" using "" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "<A name=\"65537\"></A>" using "" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "(')(\\d)" using "1\\2" searching in text 1 of text window 1 options {search mode:grep, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "<TABLE" using "<center>
<table class=\"data\"" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "(<td>)(\\d)" using "<td class=\"center\">\\2" searching in text 1 of text window 1 options {search mode:grep, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "<table class=\"data\" class=\"data\" cellspacing=\"2%\" cellpadding=\"0\" border=\"2\">" using "<table class=\"data\" class=\"data\" cellspacing=\"2%\" cellpadding=\"0\" border=\"2\" width=\"50%\">" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "</td></tr></table>" using "</td>
</tr>
</table>
</center>
" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2//EN\">
<HTML>
<HEAD>
<META HTTP-EQUIV=\"Generator\" CONTENT=\"OmniPage 11 - www.scansoft.com\">
<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=UTF-8\">
<TITLE>Recognized HTML document</TITLE></HEAD>
<BODY>
<BASEFONT size=\"2\">
<DIV align=\"left\">
<center>
<table class=\"data\">
<TD>" using "" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
copy selection
replace "<center>
<table class=\"data\" cellspacing=\"2%\" cellpadding=\"0\" width=\"100%\" border=\"2\">
<tr>
<td width=\"1%\"></td>
<td>
<center>
<table class=\"data\" cellspacing=\"2%\" cellpadding=\"0\" border=\"2\">" using "<center>
<table class=\"data\" cellspacing=\"2%\" cellpadding=\"0\" width=\"70%\" border=\"2\">" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "i," using "1," searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace ",o" using ",0" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "o," using "0," searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "0o" using "00" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "o0" using "00" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "o0" using "00" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "ioo" using "100" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
copy selection
replace "T0tal" using "Total" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
copy selection
replace "Pr0fit" using "Profit" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "ti0n" using "tion" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "g0vt" using "govt" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "Inc0me" using "Income" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
copy selection
replace "0o" using "00" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
copy selection
replace "</tr>
</table>
</center>
</td>
</tr>
</table>
</center>" using "</tr>
</table>
</center>
" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
copy selection
paste
undo
replace "</td></table>
</div>
</body></html>
" using "" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}

replace "(\\d,)\\r(\\d)" using "\\1\\2" searching in text 1 of text window 1 options {search mode:grep, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "(\\d)\\r(\\d)" using "\\1\\2" searching in text 1 of text window 1 options {search mode:grep, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "(\\d)\\r(,\\d)" using "\\1\\2" searching in text 1 of text window 1 options {search mode:grep, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "(\\d-)\\r(\\d)" using "\\1\\2" searching in text 1 of text window 1 options {search mode:grep, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "<table class=\"data\" cellspacing=\"2%\" cellpadding=\"0\" width=\"100%\" border=\"2\">
<tr>
<td>" using "<table class=\"data\" cellspacing=\"2%\" cellpadding=\"0\" width=\"80%\" border=\"2\">
<tr>
<td class=\"head-data\">" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace ".o" using ".0" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "(\\d\\.)\\r(\\d)" using "\\1\\2" searching in text 1 of text window 1 options {search mode:grep, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "(\\d)\\r(\\d)" using "\\1\\2" searching in text 1 of text window 1 options {search mode:grep, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "0f" using "of" searching in text 1 of text window 1 options {search mode:grep, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "t0n" using "ton" searching in text 1 of text window 1 options {search mode:grep, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "<font size=\"1\">" using "" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "<b>" using "" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "</b>" using "" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "i(\\d)" using "1\\1" searching in text 1 of text window 1 options {search mode:grep, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "<td>(\\d)" using "<td class=\"center\">\\1" searching in text 1 of text window 1 options {search mode:grep, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "<I>" using "" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "</I>" using "" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
copy selection
replace "<center>
<table class=\"data\" cellspacing=\"2%\" cellpadding=\"0\" border=\"2\">
<tr>
<td width=\"100%\">
<center>
<table class=\"data\" cellspacing=\"2%\" cellpadding=\"0\" width=\"100%\" border=\"2\">
<tr>
<td width=\"3%\"></td>
<td>
<center>
<table class=\"data\" cellspacing=\"2%\" cellpadding=\"0\" border=\"2\">" using "<center>
<table class=\"data\" cellspacing=\"2%\" cellpadding=\"0\" border=\"2\">" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
replace "oo" using "00" searching in text 1 of text window 1 options {search mode:literal, starting at top:false, wrap around:false, backwards:false, case sensitive:false, match words:false, extend selection:false}
end tell