Rtfreadr.c
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "rtftype.h"
#include "rtfdecl.h"
int cGroup;
bool fSkipDestIfUnk;
long cbBin;
long lParam;
RDS rds;
RIS ris;
CHP chp;
PAP pap;
SEP sep;
DOP dop;
SAVE *psave;
FILE *fpIn;
//
// %%Function: main
//
// Main loop. Initialize and parse RTF.
//
main(int argc, char *argv[])
{
FILE *fp;
int ec;
fp = fpIn = fopen("test.rtf", "r");
if (!fp)
{
printf ("Can't open test file!\n");
return 1;
}
if ((ec = ecRtfParse(fp)) != ecOK)
printf("error %d parsing rtf\n", ec);
else
printf("Parsed RTF file OK\n");
fclose(fp);
return 0;
}
//
// %%Function: ecRtfParse
//
// Step 1:
// Isolate RTF keywords and send them to ecParseRtfKeyword;
// Push and pop state at the start and end of RTF groups;
// Send text to ecParseChar for further processing.
//
int
ecRtfParse(FILE *fp)
{
int ch;
int ec;
int cNibble = 2;
int b = 0;
while ((ch = getc(fp)) != EOF)
{
if (cGroup < 0)
return ecStackUnderflow;
if (ris == risBin) // if we're parsing binary data, handle it directly
{
if ((ec = ecParseChar(ch)) != ecOK)
return ec;
}
else
{
switch (ch)
{
case '{':
if ((ec = ecPushRtfState()) != ecOK)
return ec;
break;
case '}':
if ((ec = ecPopRtfState()) != ecOK)
return ec;
break;
case '\\':
if ((ec = ecParseRtfKeyword(fp)) != ecOK)
return ec;
break;
case 0x0d:
case 0x0a: // cr and lf are noise characters...
break;
default:
if (ris == risNorm)
{
if ((ec = ecParseChar(ch)) != ecOK)
return ec;
}
else
{ // parsing hex data
if (ris != risHex)
return ecAssertion;
b = b << 4;
if (isdigit(ch))
b += (char) ch - '0';
else
{
if (islower(ch))
{
if (ch < 'a' || ch > 'f')
return ecInvalidHex;
b += (char) ch - 'a';
}
else
{
if (ch < 'A' || ch > 'F')
return ecInvalidHex;
b += (char) ch - 'A';
}
}
cNibble--;
if (!cNibble)
{
if ((ec = ecParseChar(b)) != ecOK)
return ec;
cNibble = 2;
b = 0;
ris = risNorm;
}
} // end else (ris != risNorm)
break;
} // switch
} // else (ris != risBin)
} // while
if (cGroup < 0)
return ecStackUnderflow;
if (cGroup > 0)
return ecUnmatchedBrace;
return ecOK;
}
//
// %%Function: ecPushRtfState
//
// Save relevant info on a linked list of SAVE structures.
//
int
ecPushRtfState(void)
{
SAVE *psaveNew = malloc(sizeof(SAVE));
if (!psaveNew)
return ecStackOverflow;
psaveNew -> pNext = psave;
psaveNew -> chp = chp;
psaveNew -> pap = pap;
psaveNew -> sep = sep;
psaveNew -> dop = dop;
psaveNew -> rds = rds;
psaveNew -> ris = ris;
ris = risNorm;
psave = psaveNew;
cGroup++;
return ecOK;
}
//
// %%Function: ecPopRtfState
//
// If we're ending a destination (that is, the destination is changing),
// call ecEndGroupAction.
// Always restore relevant info from the top of the SAVE list.
//
int
ecPopRtfState(void)
{
SAVE *psaveOld;
int ec;
if (!psave)
return ecStackUnderflow;
if (rds != psave->rds)
{
if ((ec = ecEndGroupAction(rds)) != ecOK)
return ec;
}
chp = psave->chp;
pap = psave->pap;
sep = psave->sep;
dop = psave->dop;
rds = psave->rds;
ris = psave->ris;
psaveOld = psave;
psave = psave->pNext;
cGroup--;
free(psaveOld);
return ecOK;
}
//
// %%Function: ecParseRtfKeyword
//
// Step 2:
// get a control word (and its associated value) and
// call ecTranslateKeyword to dispatch the control.
//
int
ecParseRtfKeyword(FILE *fp)
{
int ch;
char fParam = fFalse;
char fNeg = fFalse;
int param = 0;
char *pch;
char szKeyword[30];
char szParameter[20];
szKeyword[0] = '\0';
szParameter[0] = '\0';
if ((ch = getc(fp)) == EOF)
return ecEndOfFile;
if (!isalpha(ch)) // a control symbol; no delimiter.
{
szKeyword[0] = (char) ch;
szKeyword[1] = '\0';
return ecTranslateKeyword(szKeyword, 0, fParam);
}
for (pch = szKeyword; isalpha(ch); ch = getc(fp))
*pch++ = (char) ch;
*pch = '\0';
if (ch == '-')
{
fNeg = fTrue;
if ((ch = getc(fp)) == EOF)
return ecEndOfFile;
}
if (isdigit(ch))
{
fParam = fTrue; // a digit after the control means we have a parameter
for (pch = szParameter; isdigit(ch); ch = getc(fp))
*pch++ = (char) ch;
*pch = '\0';
param = atoi(szParameter);
if (fNeg)
param = -param;
lParam = atol(szParameter);
if (fNeg)
param = -param;
}
if (ch != ' ')
ungetc(ch, fp);
return ecTranslateKeyword(szKeyword, param, fParam);
}
//
// %%Function: ecParseChar
//
// Route the character to the appropriate destination stream.
//
int
ecParseChar(int ch)
{
if (ris == risBin && --cbBin <= 0)
ris = risNorm;
switch (rds)
{
case rdsSkip:
// Toss this character.
return ecOK;
case rdsNorm:
// Output a character. Properties are valid at this point.
return ecPrintChar(ch);
default:
// handle other destinations....
return ecOK;
}
}
//
// %%Function: ecPrintChar
//
// Send a character to the output file.
//
int
ecPrintChar(int ch)
{
// unfortunately, we don't do a whole lot here as far as layout goes...
putchar(ch);
return ecOK;
}RTFACTN.C
#include <stdio.h>
#include <string.h>
#include <stddef.h>
#include <ctype.h>
#include "rtftype.h"
#include "rtfdecl.h"
// RTF parser tables
// Property descriptions
PROP rgprop [ipropMax] = {
actnByte, propChp, offsetof(CHP, fBold), // ipropBold
actnByte, propChp, offsetof(CHP, fItalic), // ipropItalic
actnByte, propChp, offsetof(CHP, fUnderline), // ipropUnderline
actnWord, propPap, offsetof(PAP, xaLeft), // ipropLeftInd
actnWord, propPap, offsetof(PAP, xaRight), // ipropRightInd
actnWord, propPap, offsetof(PAP, xaFirst), // ipropFirstInd
actnWord, propSep, offsetof(SEP, cCols), // ipropCols
actnWord, propSep, offsetof(SEP, xaPgn), // ipropPgnX
actnWord, propSep, offsetof(SEP, yaPgn), // ipropPgnY
actnWord, propDop, offsetof(DOP, xaPage), // ipropXaPage
actnWord, propDop, offsetof(DOP, yaPage), // ipropYaPage
actnWord, propDop, offsetof(DOP, xaLeft), // ipropXaLeft
actnWord, propDop, offsetof(DOP, xaRight), // ipropXaRight
actnWord, propDop, offsetof(DOP, yaTop), // ipropYaTop
actnWord, propDop, offsetof(DOP, yaBottom), // ipropYaBottom
actnWord, propDop, offsetof(DOP, pgnStart), // ipropPgnStart
actnByte, propSep, offsetof(SEP, sbk), // ipropSbk
actnByte, propSep, offsetof(SEP, pgnFormat), // ipropPgnFormat
actnByte, propDop, offsetof(DOP, fFacingp), // ipropFacingp
actnByte, propDop, offsetof(DOP, fLandscape), // ipropLandscape
actnByte, propPap, offsetof(PAP, just), // ipropJust
actnSpec, propPap, 0, // ipropPard
actnSpec, propChp, 0, // ipropPlain
actnSpec, propSep, 0, // ipropSectd
};
// Keyword descriptions
SYM rgsymRtf[] = {
// keyword dflt fPassDflt kwd idx
"b", 1, fFalse, kwdProp, ipropBold,
"u", 1, fFalse, kwdProp, ipropUnderline,
"i", 1, fFalse, kwdProp, ipropItalic,
"li", 0, fFalse, kwdProp, ipropLeftInd,
"ri", 0, fFalse, kwdProp, ipropRightInd,
"fi", 0, fFalse, kwdProp, ipropFirstInd,
"cols", 1, fFalse, kwdProp, ipropCols,
"sbknone", sbkNon, fTrue, kwdProp, ipropSbk,
"sbkcol", sbkCol, fTrue, kwdProp, ipropSbk,
"sbkeven", sbkEvn, fTrue, kwdProp, ipropSbk,
"sbkodd", sbkOdd, fTrue, kwdProp, ipropSbk,
"sbkpage", sbkPg, fTrue, kwdProp, ipropSbk,
"pgnx", 0, fFalse, kwdProp, ipropPgnX,
"pgny", 0, fFalse, kwdProp, ipropPgnY,
"pgndec", pgDec, fTrue, kwdProp, ipropPgnFormat,
"pgnucrm", pgURom, fTrue, kwdProp, ipropPgnFormat,
"pgnlcrm", pgLRom, fTrue, kwdProp, ipropPgnFormat,
"pgnucltr", pgULtr, fTrue, kwdProp, ipropPgnFormat,
"pgnlcltr", pgLLtr, fTrue, kwdProp, ipropPgnFormat,
"qc", justC, fTrue, kwdProp, ipropJust,
"ql", justL, fTrue, kwdProp, ipropJust,
"qr", justR, fTrue, kwdProp, ipropJust,
"qj", justF, fTrue, kwdProp, ipropJust,
"paperw", 12240, fFalse, kwdProp, ipropXaPage,
"paperh", 15480, fFalse, kwdProp, ipropYaPage,
"margl", 1800, fFalse, kwdProp, ipropXaLeft,
"margr", 1800, fFalse, kwdProp, ipropXaRight,
"margt", 1440, fFalse, kwdProp, ipropYaTop,
"margb", 1440, fFalse, kwdProp, ipropYaBottom,
"pgnstart", 1, fTrue, kwdProp, ipropPgnStart,
"facingp", 1, fTrue, kwdProp, ipropFacingp,
"landscape",1, fTrue, kwdProp, ipropLandscape,
"par", 0, fFalse, kwdChar, 0x0a,
"\0x0a", 0, fFalse, kwdChar, 0x0a,
"\0x0d", 0, fFalse, kwdChar, 0x0a,
"tab", 0, fFalse, kwdChar, 0x09,
"ldblquote",0, fFalse, kwdChar, '"',
"rdblquote",0, fFalse, kwdChar, '"',
"bin", 0, fFalse, kwdSpec, ipfnBin,
"*", 0, fFalse, kwdSpec, ipfnSkipDest,
"'", 0, fFalse, kwdSpec, ipfnHex,
"author", 0, fFalse, kwdDest, idestSkip,
"buptim", 0, fFalse, kwdDest, idestSkip,
"colortbl", 0, fFalse, kwdDest, idestSkip,
"comment", 0, fFalse, kwdDest, idestSkip,
"creatim", 0, fFalse, kwdDest, idestSkip,
"doccomm", 0, fFalse, kwdDest, idestSkip,
"fonttbl", 0, fFalse, kwdDest, idestSkip,
"footer", 0, fFalse, kwdDest, idestSkip,
"footerf", 0, fFalse, kwdDest, idestSkip,
"footerl", 0, fFalse, kwdDest, idestSkip,
"footerr", 0, fFalse, kwdDest, idestSkip,
"footnote", 0, fFalse, kwdDest, idestSkip,
"ftncn", 0, fFalse, kwdDest, idestSkip,
"ftnsep", 0, fFalse, kwdDest, idestSkip,
"ftnsepc", 0, fFalse, kwdDest, idestSkip,
"header", 0, fFalse, kwdDest, idestSkip,
"headerf", 0, fFalse, kwdDest, idestSkip,
"headerl", 0, fFalse, kwdDest, idestSkip,
"headerr", 0, fFalse, kwdDest, idestSkip,
"info", 0, fFalse, kwdDest, idestSkip,
"keywords", 0, fFalse, kwdDest, idestSkip,
"operator", 0, fFalse, kwdDest, idestSkip,
"pict", 0, fFalse, kwdDest, idestSkip,
"printim", 0, fFalse, kwdDest, idestSkip,
"private1", 0, fFalse, kwdDest, idestSkip,
"revtim", 0, fFalse, kwdDest, idestSkip,
"rxe", 0, fFalse, kwdDest, idestSkip,
"stylesheet", 0, fFalse, kwdDest, idestSkip,
"subject", 0, fFalse, kwdDest, idestSkip,
"tc", 0, fFalse, kwdDest, idestSkip,
"title", 0, fFalse, kwdDest, idestSkip,
"txe", 0, fFalse, kwdDest, idestSkip,
"xe", 0, fFalse, kwdDest, idestSkip,
"{", 0, fFalse, kwdChar, '{',
"}", 0, fFalse, kwdChar, '}',
"\\", 0, fFalse, kwdChar, '\\'
};
int isymMax = sizeof(rgsymRtf) / sizeof(SYM);
//
// %%Function: ecApplyPropChange
//
// Set the property identified by _iprop_ to the value _val_.
//
//
int
ecApplyPropChange(IPROP iprop, int val)
{
char *pb;
if (rds == rdsSkip) // If we're skipping text,
return ecOK; // don't do anything.
switch (rgprop[iprop].prop)
{
case propDop:
pb = (char *)&dop;
break;
case propSep:
pb = (char *)&sep;
break;
case propPap:
pb = (char *)&pap;
break;
case propChp:
pb = (char *)&chp;
break;
default:
if (rgprop[iprop].actn != actnSpec)
return ecBadTable;
break;
}
switch (rgprop[iprop].actn)
{
case actnByte:
pb[rgprop[iprop].offset] = (unsigned char) val;
break;
case actnWord:
(*(int *) (pb+rgprop[iprop].offset)) = val;
break;
case actnSpec:
return ecParseSpecialProperty(iprop, val);
break;
default:
return ecBadTable;
}
return ecOK;
}
//
// %%Function: ecParseSpecialProperty
//
// Set a property that requires code to evaluate.
//
int
ecParseSpecialProperty(IPROP iprop, int val)
{
switch (iprop)
{
case ipropPard:
memset(&pap, 0, sizeof(pap));
return ecOK;
case ipropPlain:
memset(&chp, 0, sizeof(chp));
return ecOK;
case ipropSectd:
memset(&sep, 0, sizeof(sep));
return ecOK;
default:
return ecBadTable;
}
return ecBadTable;
}
//
// %%Function: ecTranslateKeyword.
//
// Step 3.
// Search rgsymRtf for szKeyword and evaluate it appropriately.
//
// Inputs:
// szKeyword: The RTF control to evaluate.
// param: The parameter of the RTF control.
// fParam: fTrue if the control had a parameter; (that is, if param is valid)
// fFalse if it did not.
//
int
ecTranslateKeyword(char *szKeyword, int param, bool fParam)
{
int isym;
// search for szKeyword in rgsymRtf
for (isym = 0; isym < isymMax; isym++)
if (strcmp(szKeyword, rgsymRtf[isym].szKeyword) == 0)
break;
if (isym == isymMax) // control word not found
{
if (fSkipDestIfUnk) // if this is a new destination
rds = rdsSkip; // skip the destination
// else just discard it
fSkipDestIfUnk = fFalse;
return ecOK;
}
// found it! use kwd and idx to determine what to do with it.
fSkipDestIfUnk = fFalse;
switch (rgsymRtf[isym].kwd)
{
case kwdProp:
if (rgsymRtf[isym].fPassDflt || !fParam)
param = rgsymRtf[isym].dflt;
return ecApplyPropChange(rgsymRtf[isym].idx, param);
case kwdChar:
return ecParseChar(rgsymRtf[isym].idx);
case kwdDest:
return ecChangeDest(rgsymRtf[isym].idx);
case kwdSpec:
return ecParseSpecialKeyword(rgsymRtf[isym].idx);
default:
return ecBadTable;
}
return ecBadTable;
}
//
// %%Function: ecChangeDest
//
// Change to the destination specified by idest.
// There's usually more to do here than this...
//
int
ecChangeDest(IDEST idest)
{
if (rds == rdsSkip) // if we're skipping text,
return ecOK; // don't do anything
switch (idest)
{
default:
rds = rdsSkip; // when in doubt, skip it...
break;
}
return ecOK;
}
//
// %%Function: ecEndGroupAction
//
// The destination specified by rds is coming to a close.
// If there's any cleanup that needs to be done, do it now.
//
int
ecEndGroupAction(RDS rds)
{
return ecOK;
}
//
// %%Function: ecParseSpecialKeyword
//
// Evaluate an RTF control that needs special processing.
//
int
ecParseSpecialKeyword(IPFN ipfn)
{
if (rds == rdsSkip && ipfn != ipfnBin) // if we're skipping, and it's not
return ecOK; // the \bin keyword, ignore it.
switch (ipfn)
{
case ipfnBin:
ris = risBin;
cbBin = lParam;
break;
case ipfnSkipDest:
fSkipDestIfUnk = fTrue;
break;
case ipfnHex:
ris = risHex;
break;
default:
return ecBadTable;
}
return ecOK;
}