Appendix B

//

// Sample code to read the text out of a PowerPoint '97 presentation.

//

#include <ole2.h>

#include <stdio.h>

#include <time.h>

// Stolen from app\sertypes.h

// system dependent sizes

// system dependent sizes

typedef signed long sint4; // signed 4-byte integral value

typedef signed short sint2; // signed 4-byte integral value

typedef unsigned long uint4; // unsigned 4-byte integral value

typedef unsigned short uint2; // 2-byte

typedef char bool1; // 1-byte boolean

typedef unsigned char ubyte1; // unsigned byte value

typedef uint2 psrType;

typedef uint4 psrSize; // each record is preceeded by

// pssTypeType and pssSizeType.

typedef uint2 psrInstance;

typedef uint2 psrVersion;

typedef uint4 psrReference; // Saved object reference

#define PSFLAG_CONTAINER 0xFF // If the version field of a record

// header takes on this value, the

// record header marks the start of

// a container.

// PowerPoint97 Record Header

typedef unsigned long DWord;

int AssertionFailed( const char* file, int line, const char* expr )

/*=================*/

{// AR: Message box the assert

return( TRUE );

} /* AssertionFailed */

#define Assert( expr ) \

{ \

static char _str[] = #expr; \

\

if( !(int)(expr) ) \

AssertionFailed( __FILE__, __LINE__, _str ); \

} /* Assert */

static BOOL ReadText( WCHAR* buffer, unsigned long bufferSize, unsigned long* pSizeRet );

// Returns TRUE if more text exists. Fills buffer upto bufferSize. Actual size used is

// pSizeRet.

struct RecordHeader

{

psrVersion recVer : 4; // may be PSFLAG_CONTAINER

psrInstance recInstance : 12;

psrType recType;

psrSize recLen;

};

struct PSR_CurrentUserAtom

{

uint4 size;

uint4 magic; // Magic number to ensure this is a PowerPoint file.

uint4 offsetToCurrentEdit; // Offset in main stream to current edit field.

uint2 lenUserName;

uint2 docFileVersion;

ubyte1 majorVersion;

ubyte1 minorVersion;

};

struct PSR_UserEditAtom

{

sint4 lastSlideID; // slideID

uint4 version; // This is major/minor/build which did the edit

uint4 offsetLastEdit; // File offset of last edit

uint4 offsetPersistDirectory; // Offset to PersistPtrs for

// this file version.

uint4 documentRef;

uint4 maxPersistWritten; // Addr of last persist ref written to the file (max seen so far).

sint2 lastViewType; // enum view type

};

struct PSR_SlidePersistAtom

{

uint4 psrReference;

uint4 flags;

sint4 numberTexts;

sint4 slideId;

uint4 reserved;

};

#define CURRENT_USER_STREAM L"Current User"

#define DOCUMENT_STREAM L"PowerPoint Document"

#define HEADER_MAGIC_NUM -476987297

const int PST_UserEditAtom = 4085;

const int PST_PersistPtrIncrementalBlock = 6002; // Incremental diffs on persists

const int PST_SlidePersistAtom = 1011;

const int PST_TextCharsAtom = 4000; // Unicode in text

const int PST_TextBytesAtom = 4008; // non-unicode text

class PPSPersistDirectory;

struct ParseContext

{

ParseContext(ParseContext *pNext) : m_pNext(pNext), m_nCur(0) {}

RecordHeader m_rh;

uint4 m_nCur;

ParseContext *m_pNext;

};

const int SLIDELISTCHUNKSIZE=32;

struct SlideListChunk

{

SlideListChunk( SlideListChunk* next, psrReference newOne ) :

pNext( next ), numInChunk(1) { refs[0] = newOne; }

SlideListChunk *pNext;

DWord numInChunk;

psrReference refs[SLIDELISTCHUNKSIZE];

};

class FileReader

{

public:

FileReader(IStorage *pStg);

~FileReader();

BOOL ReadText( WCHAR *pBuff, ULONG size, ULONG *pSizeRet );

// Reads next size chars from file. Returns TRUE if there is more

// text to read.

BOOL IsPowerPoint() { return m_isPP; } // Returns true if this is a PowerPoint '97 file.

void ReadPersistDirectory();

void PPSReadUserEditAtom( DWord offset, PSR_UserEditAtom& userEdit );

void ReadSlideList();

protected:

BOOL ReadCurrentUser(IStream *pStm);

void *ReadRecord( RecordHeader& rh );

BOOL Parse();

IStream *GetDocStream();

BOOL DoesClientRead( psrType type ) { return FALSE; }

void ReleaseRecord( RecordHeader& rh, void* diskRecBuf );

DWord ParseForSlideLists();

void AddSlideToList( psrReference refToAdd );

BOOL StartParse( DWord offset );

BOOL FillBufferWithText();

BOOL FindNextSlide( DWord& offset );

private:

PSR_CurrentUserAtom m_currentUser;

IStream * m_pDocStream;

IStorage * m_pPowerPointStg;

BOOL m_isPP;

ParseContext* m_pParseContexts;

WCHAR* m_pCurText;

unsigned long m_curTextPos;

unsigned long m_curTextLength;

PSR_UserEditAtom* m_pLastUserEdit;

PPSPersistDirectory* m_pPersistDirectory;

SlideListChunk* m_pFirstChunk;

int m_curSlideNum;

WCHAR* m_pClientBuf;

unsigned long m_clientBufSize;

unsigned long m_clientBufPos;

ULONG* m_pSizeRet;

};

FileReader::FileReader(IStorage *pStg) :

m_pPowerPointStg(pStg),

m_isPP(FALSE),

m_pParseContexts(NULL),

m_curTextPos(0),

m_pLastUserEdit( NULL ),

m_pPersistDirectory( NULL ),

m_pDocStream( NULL ),

m_pFirstChunk( NULL ),

m_curSlideNum(0),

m_pCurText( NULL ),

m_pClientBuf( NULL ),

m_clientBufSize( 0 ),

m_clientBufPos( 0 )

{

IStream *pStm = NULL;

m_pPowerPointStg->AddRef();

HRESULT hr = pStg->OpenStream( CURRENT_USER_STREAM, NULL, STGM_READ | STGM_DIRECT | STGM_SHARE_EXCLUSIVE, NULL, &pStm );

if( SUCCEEDED(hr) && ReadCurrentUser(pStm) )

m_isPP = TRUE;

pStm->Release();

}

FileReader::~FileReader()

{

m_pPowerPointStg->Release();

}

BOOL FileReader::FillBufferWithText()

{

unsigned long amtToCopy = min( (m_curTextLength - m_curTextPos), (m_clientBufSize - m_clientBufPos) );

unsigned long loop = amtToCopy;

while( loop-- )

m_pClientBuf[ m_clientBufPos++ ] = m_pCurText[ m_curTextPos++ ];

if( m_curTextPos == m_curTextLength )

{

delete [] m_pCurText;

m_pCurText = NULL;

m_curTextPos = 0;

m_curTextLength = 0;

}

*m_pSizeRet += amtToCopy;

return (m_clientBufSize == m_clientBufPos); // If client's buffer is full return TRUE.

}

void FileReader::AddSlideToList( psrReference refToAdd )

{

if( m_pFirstChunk == NULL )

m_pFirstChunk = new SlideListChunk(NULL, refToAdd);

else

{

if( m_pFirstChunk->numInChunk+1 > SLIDELISTCHUNKSIZE )

m_pFirstChunk = new SlideListChunk(m_pFirstChunk, refToAdd);

else

{

m_pFirstChunk->refs[m_pFirstChunk->numInChunk] = refToAdd;

m_pFirstChunk->numInChunk++;

}

}

}

IStream *FileReader::GetDocStream()

{

if( m_pDocStream == NULL )

{

if( !m_isPP )

return NULL;

HRESULT hr = m_pPowerPointStg->OpenStream( DOCUMENT_STREAM, NULL, STGM_READ | STGM_DIRECT | STGM_SHARE_EXCLUSIVE, NULL, &m_pDocStream );

if (FAILED(hr))

{

fprintf(stderr,"Error (%d) opening PowerPoint Document Stream.\n",(int)hr);

return NULL;

}

}

return m_pDocStream;

}

BOOL FileReader::ReadCurrentUser(IStream *pStm)

{

ULONG nRd=0;

RecordHeader rh;

BOOL isPP = FALSE;

if( SUCCEEDED( pStm->Read(&rh, sizeof(rh), &nRd) ) )

{

if( SUCCEEDED( pStm->Read(&m_currentUser, sizeof(PSR_CurrentUserAtom), &nRd) ) )

{

if( nRd != sizeof(PSR_CurrentUserAtom) )

return FALSE;

}

isPP = ( m_currentUser.size == sizeof( m_currentUser ) )&&

( m_currentUser.magic == HEADER_MAGIC_NUM )&&

( m_currentUser.lenUserName <= 255 );

}

return isPP;

}

class PPSDirEntry

{

PPSDirEntry()

: m_pNext( NULL ), m_pOffsets( NULL ), m_tableSize( 0 ){}

PPSDirEntry* m_pNext;

DWord* m_pOffsets;

DWord m_tableSize;

public:

~PPSDirEntry(){ delete m_pOffsets; m_pOffsets = NULL; }

friend class PPSPersistDirectory;

}; // class PPSDirEntry

class PPSPersistDirectory

{

public:

PPSPersistDirectory();

~PPSPersistDirectory();

void AddEntry( DWord cOffsets, DWord* pOffsets );

DWord GetPersistObjStreamPos( DWord ref );

DWord NumberOfAlreadySavedPersists();

private:

PPSDirEntry* m_pFirstDirEntry;

};

PPSPersistDirectory::PPSPersistDirectory() : m_pFirstDirEntry( NULL ){}

PPSPersistDirectory::~PPSPersistDirectory()

{

while( m_pFirstDirEntry )

{

PPSDirEntry* pDirEntry = m_pFirstDirEntry;

m_pFirstDirEntry = m_pFirstDirEntry->m_pNext;

delete pDirEntry;

}

}

void PPSPersistDirectory::AddEntry( DWord cOffsets, DWord* pOffsets )

{

PPSDirEntry* pDirEntry = new PPSDirEntry();

pDirEntry->m_tableSize = cOffsets;

pDirEntry->m_pOffsets = new DWord[cOffsets];

memcpy( pDirEntry->m_pOffsets, pOffsets, cOffsets * sizeof( DWord ) );

// append to the end of the entry list

PPSDirEntry** ppDirEntry = &m_pFirstDirEntry;

while( NULL != *ppDirEntry )

ppDirEntry = &(*ppDirEntry)->m_pNext;

*ppDirEntry = pDirEntry;

}

DWord PPSPersistDirectory::GetPersistObjStreamPos( DWord ref )

{

PPSDirEntry* pEntry = m_pFirstDirEntry;

while( pEntry )

{

DWord* pOffsets = pEntry->m_pOffsets;

while( (DWord)( (char*)pOffsets - (char*)pEntry->m_pOffsets ) < pEntry->m_tableSize * sizeof( DWord ) )

{

DWord nRefs = pOffsets[0] >> 20;

DWord base = pOffsets[0] & 0xFFFFF; // 1-based

if( ( base <= ref )&&( ref < base + nRefs ) )

return pOffsets[ 1 + ref - base ];

pOffsets += nRefs + 1;

}

pEntry = pEntry->m_pNext;

}

return (DWord) -1;

}

DWord PPSPersistDirectory::NumberOfAlreadySavedPersists()

{

DWord count = 0;

PPSDirEntry* pEntry = m_pFirstDirEntry;

while( pEntry )

{

DWord* pOffsets = pEntry->m_pOffsets;

while( (DWord)( pEntry->m_pOffsets - pOffsets ) < pEntry->m_tableSize * sizeof( DWord ) )

{

DWord nRefs = pOffsets[0] >> 20;

count += nRefs;

pOffsets += nRefs + 1;

}

pEntry = pEntry->m_pNext;

}

return count;

}

void FileReader::PPSReadUserEditAtom( DWord offset, PSR_UserEditAtom& userEdit )

{

LARGE_INTEGER li;

li.LowPart = offset;

li.HighPart = 0;

GetDocStream()->Seek(li,STREAM_SEEK_SET, NULL);

RecordHeader rh;

GetDocStream()->Read(&rh, sizeof(rh), NULL);

Assert( rh.recType == PST_UserEditAtom );

Assert( rh.recLen == sizeof( PSR_UserEditAtom ) );

li.LowPart = offset;

GetDocStream()->Read(&userEdit, sizeof(userEdit), NULL);

}

void *FileReader::ReadRecord( RecordHeader& rh )

// Return values:

// NULL and rh.recVer == PSFLAG_CONTAINER: no record was read in.

// record header indicated start of container.

// NULL and rh.recVer != PSFLAG_CONTAINER: client must read in record.

{

IStream *pStm = GetDocStream();

// read record header, verify

pStm->Read(&rh, sizeof(rh), NULL); //AR: Check Error

// if client will read, do not read in record

if( DoesClientRead( rh.recType ) )

return NULL;

// If container, return NULL

if(rh.recVer == PSFLAG_CONTAINER)

return NULL;

// Allocate buffer for disk record. Client must call ReleaseRecord() or

// pass the atom up to CObject::ConstructContents() which will

// then release it.

void* buffer = new char[rh.recLen];

// read in record

pStm->Read(buffer, rh.recLen, NULL);

// NOTE: ByteSwapping & versioning not done by this simple reader.

return (buffer);

}

void FileReader::ReleaseRecord( RecordHeader& rh, void* diskRecBuf )

{

if(rh.recType && rh.recVer!=PSFLAG_CONTAINER)

delete [] (char*)diskRecBuf;

rh.recType = 0; // consume the record so that record doesn't

// get processed again.

}

void FileReader::ReadPersistDirectory()

{

if( NULL != m_pLastUserEdit )

return; // already read

PSR_UserEditAtom userEdit;

DWord offsetToEdit = m_currentUser.offsetToCurrentEdit;

while( 0 < offsetToEdit )

{

PPSReadUserEditAtom( offsetToEdit, userEdit );

if( NULL == m_pLastUserEdit )

{

m_pPersistDirectory = new PPSPersistDirectory();

m_pLastUserEdit = new PSR_UserEditAtom;

*m_pLastUserEdit = userEdit;

}

LARGE_INTEGER li;

li.LowPart = userEdit.offsetPersistDirectory;

li.HighPart = 0;

GetDocStream()->Seek(li,STREAM_SEEK_SET, NULL); // AR: check that seek succeeded.

RecordHeader rh;

DWord *pDiskRecord = (DWord*) ReadRecord(rh);

Assert( PST_PersistPtrIncrementalBlock == rh.recType );

m_pPersistDirectory->AddEntry( rh.recLen / sizeof( DWord ), pDiskRecord );

ReleaseRecord( rh, pDiskRecord );

offsetToEdit = userEdit.offsetLastEdit;

}

} // PPStorage::ReadPersistDirectory

void FileReader::ReadSlideList()

{

Assert( m_pLastUserEdit != NULL );

DWord offsetToDoc = m_pPersistDirectory->GetPersistObjStreamPos( m_pLastUserEdit->documentRef );

LARGE_INTEGER li;

li.LowPart = offsetToDoc;

li.HighPart = 0;

GetDocStream()->Seek(li,STREAM_SEEK_SET, NULL);

ParseForSlideLists();

}

DWord FileReader::ParseForSlideLists()

{

IStream *pStm = GetDocStream();

RecordHeader rh;

DWord nRd=0;

// Stack based parsing for SlideLists

pStm->Read(&rh, sizeof(rh), &nRd);

if( ( rh.recVer != PSFLAG_CONTAINER ) && ( (rh.recVer & 0x0F)!=0x0F ) )

{

if( rh.recType == PST_SlidePersistAtom )

{

PSR_SlidePersistAtom spa;

Assert( sizeof(spa) == rh.recLen );

pStm->Read(&spa, sizeof(spa), &nRd);

AddSlideToList( spa.psrReference );

}

else

{

LARGE_INTEGER li;

li.LowPart = rh.recLen;

li.HighPart = 0;

pStm->Seek(li,STREAM_SEEK_CUR, NULL);

}

nRd += rh.recLen;

}

else

{

DWord nCur = 0;

while( nCur < rh.recLen )

{

nCur += ParseForSlideLists();

}

nRd += nCur;

}

return nRd;

}

BOOL FileReader::ReadText( WCHAR *pBuff, ULONG size, ULONG *pSizeRet )

{

DWord offset;

*pSizeRet = 0;

m_pSizeRet = pSizeRet;

m_pClientBuf = pBuff;

m_clientBufSize = size;

m_clientBufPos = 0;

for( ;; )

{

if( ( m_pParseContexts == NULL ) )

{

if( FindNextSlide(offset) )

{

if( StartParse( offset ) )

return TRUE;

}

else

return FALSE; // DONE parsing, no more slides

}

else

{

if( m_pClientBuf )

{

if( FillBufferWithText() ) // Use existing text first.

return TRUE;

}

if( Parse() ) // restart parse where we left off.

return TRUE;

}

}

}

BOOL FileReader::StartParse( DWord offset )

{

LARGE_INTEGER li;

li.LowPart = offset;

li.HighPart = 0;

GetDocStream()->Seek(li,STREAM_SEEK_SET, NULL);

m_pParseContexts = new ParseContext( NULL );

GetDocStream()->Read(&m_pParseContexts->m_rh, sizeof(RecordHeader), NULL);

return Parse();

}

BOOL FileReader::Parse()

{

IStream *pStm = GetDocStream();

RecordHeader rh;

DWord nRd=0;

Assert( m_pParseContexts );

// Restarting a parse might complete a container so we test this initially.

if( m_pParseContexts->m_nCur >= m_pParseContexts->m_rh.recLen )

{

Assert( m_pParseContexts->m_nCur == m_pParseContexts->m_rh.recLen );

ParseContext* pParseContext = m_pParseContexts;

m_pParseContexts = m_pParseContexts->m_pNext;

delete pParseContext;

}

do

{

pStm->Read(&rh, sizeof(RecordHeader), NULL);

if( ( rh.recVer != PSFLAG_CONTAINER ) && ( (rh.recVer & 0x0F)!=0x0F ) )

{

if( rh.recType == PST_TextCharsAtom )

{

m_curTextPos = 0;

m_curTextLength = rh.recLen/2;

Assert( m_pCurText == NULL );

m_pCurText = new WCHAR[rh.recLen/2];

pStm->Read(m_pCurText, rh.recLen, &nRd);

wprintf( L"-%s-\n", m_pCurText );

if( FillBufferWithText() )

return TRUE; // Stop parsing if buffer is full, and return control to client

}

else if( rh.recType == PST_TextBytesAtom )

{

Assert( m_pCurText == NULL );

m_curTextPos = 0;

m_curTextLength = rh.recLen;

m_pCurText = new WCHAR[rh.recLen];

pStm->Read(m_pCurText, rh.recLen, &nRd);

char *pHack = (char *) m_pCurText;

unsigned int back2 = rh.recLen*2-1;

unsigned int back1 = rh.recLen-1;

for(unsigned int i=0;i<rh.recLen;i++)

{

pHack[back2-1] = pHack[back1];

pHack[back2] = 0;

back2 -=2;

back1--;

}

if( FillBufferWithText() )

return TRUE; // Stop parsing if buffer is full, and return control to client

}

else

{

LARGE_INTEGER li;

ULARGE_INTEGER ul;

li.LowPart = rh.recLen;

li.HighPart = 0;

pStm->Seek(li,STREAM_SEEK_CUR,&ul);

}

m_pParseContexts->m_nCur += rh.recLen;

m_pParseContexts->m_nCur += sizeof( RecordHeader ); // Atom rh's add towards containing container's size.

}

else

{

m_pParseContexts = new ParseContext( m_pParseContexts );

m_pParseContexts->m_rh = rh;

}

if( m_pParseContexts->m_nCur >= m_pParseContexts->m_rh.recLen )

{

Assert( m_pParseContexts->m_nCur == m_pParseContexts->m_rh.recLen );

ParseContext* pParseContext = m_pParseContexts;

m_pParseContexts = m_pParseContexts->m_pNext;

delete pParseContext;

}

} while( m_pParseContexts && ( m_pParseContexts->m_nCur < m_pParseContexts->m_rh.recLen ) );

return FALSE;

}

BOOL FileReader::FindNextSlide( DWord& offset )

{

if( m_curSlideNum == 0 )

{

Assert( m_pLastUserEdit != NULL );

offset = m_pPersistDirectory->GetPersistObjStreamPos( m_pLastUserEdit->documentRef );

m_curSlideNum++;

return TRUE;

}

else

{

uint4 curSlideNum = m_curSlideNum++;

SlideListChunk *pCur = m_pFirstChunk;

while( pCur && ( curSlideNum > pCur->numInChunk ) )

{

curSlideNum -= pCur->numInChunk;

pCur = pCur->pNext;

}

if( pCur == NULL )

return FALSE;

offset = m_pPersistDirectory->GetPersistObjStreamPos( pCur->refs[curSlideNum-1] );

return TRUE;

}

}

static BOOL ReadText( void** ppContext, IStorage* pStgFrom, WCHAR* buffer, unsigned long bufferSize, unsigned long* pSizeRet )

{

FileReader* pFI = NULL;

if( *ppContext == NULL )

{

pFI = new FileReader( pStgFrom );

*ppContext = pFI;

if( !pFI->IsPowerPoint() )

{

delete pFI;

*pSizeRet = 0;

return FALSE;

}

pFI->ReadPersistDirectory();

pFI->ReadSlideList();

}

else

{

pFI = (FileReader *)*ppContext;

}

BOOL bRet = pFI->ReadText(buffer, bufferSize, pSizeRet);

if( !bRet )

{

delete pFI;

*ppContext = NULL;

}

return bRet;

}

void main(int argc, char **argv)

{

OLECHAR wc[256];

HRESULT hr;

IStorage *pStgFrom = NULL;

if (argc < 2)

{

fprintf(stderr,"Usage dblock <file to be read>\n");

exit(0);

}

MultiByteToWideChar( CP_ACP, MB_PRECOMPOSED, argv[1], -1, wc, 255);

hr = StgOpenStorage(wc, NULL, STGM_READ | STGM_DIRECT |

STGM_SHARE_DENY_WRITE, NULL, 0, &pStgFrom);

if (FAILED(hr))

{

fprintf(stderr,"Error (%d) opening docfile: %s\n",(int)hr,argv[1]);

}

else

{

WCHAR wcBuf[6];

ULONG sizeUsed;

BOOL fContinue = TRUE;

void *pContext = NULL;

while( fContinue )

{

fContinue = ReadText( &pContext, pStgFrom, wcBuf, 5, &sizeUsed );

wcBuf[sizeUsed] = 0;

wprintf(L"-%s-\n", wcBuf);

}

}

}