FILEIO.CPP

//-------------------------------------------------------------------- 
// Microsoft OLE DB
// (C) Copyright 1994 - 1996 Microsoft Corporation. All Rights Reserved.
//
// @doc
//
// @module FILEIO.CPP | This module contains the File Manipulation code
// for a Comma Seperated Value (CSV) Simple Provider.
//
//
#include "headers.h"
#include "fileio.h"

static const ARRAY_INIT_SIZE = 1000;

// Data Types supported
static const TYPE_CHAR = 1;
static const TYPE_SLONG = 3;

// Data Type Parse strings and lengths
static const char CHAR_STRING[] = "CHAR";
static const CHAR_STRING_SIZE = 4;
static const char SLONG_STRING[] = "SLONG";
static const SLONG_STRING_SIZE = 5;


//--------------------------------------------------------------------
// @mfunc Constructor for this class
//
// @rdesc NONE
//
CFileIO::CFileIO()
{
m_pColNames = NULL;
m_pvInput = NULL;
m_ulDataTypeOffset = 0;
m_dwColumns = 0;
m_cRows = 0;
}


//--------------------------------------------------------------------
// @mfunc Destructor for this class
//
// @rdesc NONE
//
CFileIO:: ~CFileIO()
{
// Close file
if (is_open())
close();

// Delete buffers
delete[] m_pColNames;
delete[] m_pvInput;
}


//--------------------------------------------------------------------
// @mfunc Initialization routine, opens file specified and creates
// buffers
//
// @rdesc HRESULTs
// @flag S_OK | Succeeded
// @flag E_FAIL | Failed to Initialize
//
HRESULT CFileIO::fInit
(
LPSTR ptstrFileName //@parm IN | File Name to Open
)
{
// Allocate Stream Buffer
m_pvInput = new char[MAX_INPUT_BUFFER ];
if (NULL == m_pvInput)
return ResultFromScode( E_FAIL );

// Open the File
//@DEVNOTE: csv file must not be read only, else E_FAIL will be returned.
open( ptstrFileName, ios::in | ios::out | ios::nocreate, filebuf::sh_none );
if (!is_open())
return ResultFromScode( E_FAIL );

// Obtain the Column Names, Data Types, and Indexes
// for each of the rows
if (FAILED( GenerateFileInfo()))
return ResultFromScode( E_FAIL );

return ResultFromScode( S_OK );
}


//--------------------------------------------------------------------
// @mfunc Retrieve the Name associated with a particular column. If
// names have not been read from the file yet, retrieve those names.
// If a name does not exist for a column, fabricate one.
//
// @rdesc HRESULT
// @flag S_OK | Succeeded
// @flag E_FAIL | Invalid Column Number
//
HRESULT CFileIO::GetColumnName
(
DWORD dwCol, //@parm IN | Column Number
LPSTR* pptstrName //@parm OUT | Pointer to Column Name
)
{
// Column number greater than MAX
if (dwCol > MAX_COLUMNS)
{
TRACE( "dwCol > MAX_COLUMNS" );
return ResultFromScode( E_FAIL );
}

// If Column Names have not been retrieved,
// then retrieve them into the internal array
if (!m_pColNames)
{
// Save Current Position and move to beginning of file
seekg( 0L );
clear();

// Retrieve the column names record
getline( m_pvInput, MAX_INPUT_BUFFER );
if (good() && 0 < gcount())
{
m_pColNames = new char[gcount() ];
memcpy( m_pColNames, m_pvInput, gcount());
}
else
{
//Invalid Table, first line does not contain
//column metadata.
return ResultFromScode(E_FAIL);
}

ParseColumnNames( m_pColNames );

return ResultFromScode( S_FALSE );
}

ASSERT( pptstrName );

// If the column number is in range then return
// the pointer
if ((0 == dwCol) || (m_dwColumns < dwCol))
return ResultFromScode( E_FAIL );
else
{
*pptstrName = m_rgpColNames[dwCol];
return ResultFromScode( S_OK );
}
}


//--------------------------------------------------------------------
// @mfunc Tokenize the column names
//
// @rdesc HRESULT
// @flag S_OK | Parsing yielded no Error
//
HRESULT CFileIO::ParseColumnNames
(
LPTSTR ptstrInput
)
{
LPTSTR pvInput = ptstrInput;

ASSERT( pvInput );

// Set first column pointer
if ('\0' != *pvInput)
{
m_rgpColNames[++m_dwColumns] = pvInput;
}

// Null Terminate each column
while ('\0' != *pvInput)
{
// Check for Comma
if (0 == strncmp( ",", pvInput, sizeof( char )))
{
memcpy( pvInput, "", sizeof( char ));

if (0 != strncmp( "", (LPTSTR) ((ULONG) pvInput + 1), sizeof( char )))
{
m_rgpColNames[++m_dwColumns] = (LPTSTR) ((ULONG) pvInput + 1);
}
}

pvInput++;
}

return ResultFromScode( S_OK );
}


//--------------------------------------------------------------------
// @mfunc Retrieves the columns data characteristics
//
// @rdesc HRESULT
// @flag S_OK | Succeeded
// @flag E_FAIL | Invalid Column Number
//
HRESULT CFileIO::GetDataTypes
(
DWORD dwCol, //@parm IN | Column number
SWORD* pswType, //@parm OUT | Data Type
UDWORD* pudwColDef, //@parm OUT | Precision of the column
BOOL* pfSigned //@parm OUT | Is the columns signed
)
{
HRESULT hr;

// Column number greater than MAX
if (dwCol > MAX_COLUMNS)
{
TRACE( "dwCol > MAX_COLUMNS" );
return ResultFromScode( E_FAIL );
}

// If Data Types have not been retrieved,
// then retrieve them into the internal array
if (0 == m_ulDataTypeOffset)
{
seekg( 0L );
clear();

// To retrieve the Column Data Types, we need
// to skip the first row and get to the 2nd row
getline( m_pvInput, MAX_INPUT_BUFFER );
m_ulDataTypeOffset = tellg();
getline( m_pvInput, MAX_INPUT_BUFFER );

// Check Stream status
if (bad() || 0 == gcount())
return ResultFromScode( E_FAIL );

// Parse the datatypes from the stream.
hr = ParseDataTypes();
if (FAILED( hr ))
return hr;

return ResultFromScode( S_FALSE );
}

assert( pswType || pudwColDef || pfSigned );

// If the column number is in range then return
// the pointer
if ((0 == dwCol) || (m_dwColumns < dwCol))
return ResultFromScode( E_FAIL );
else
{
*pswType = m_rgswColType[dwCol];
*pudwColDef = m_rgudwColSize[dwCol];
*pfSigned = m_rgfSigned[dwCol];
return ResultFromScode( S_OK );
}
}


//--------------------------------------------------------------------
// @mfunc Tokenize the DataTypes and Lengths
// Valid Data Types are CHAR(n), INTEGER, and LONG
//
// CHAR, SLONG
//
// @rdesc HRESULT
// @flag S_OK | Parsing yielded no Error
//
HRESULT CFileIO::ParseDataTypes()
{
ULONG ulColumn = 0;
LPSTR pVal, pOpen;

assert( m_pvInput );

pVal = strtok( m_pvInput, ",\0" );
if (NULL == pVal)
return ResultFromScode( E_FAIL );

while (NULL != pVal)
{
ulColumn++;

if (0 == _strnicmp( pVal, CHAR_STRING, CHAR_STRING_SIZE ))
{
m_rgswColType[ulColumn] = TYPE_CHAR;
pOpen = strstr( pVal, "(" );
m_rgudwColSize[ulColumn] = atol( ++pOpen ) + 1;
m_rgfSigned[ulColumn] = FALSE;
}
else if (0 == _strnicmp( pVal, SLONG_STRING, SLONG_STRING_SIZE ))
{
m_rgswColType[ulColumn] = TYPE_SLONG;
m_rgudwColSize[ulColumn] = 4;
m_rgfSigned[ulColumn] = TRUE;
}
else
return ResultFromScode( E_FAIL );

pVal = strtok( NULL, ",\0" );
}

// should have exactly the same number of types as we have columns
if (ulColumn != m_dwColumns)
return ResultFromScode( E_FAIL );

return ResultFromScode( S_OK );
}


//--------------------------------------------------------------------
// @mfunc Obtain the offsets into the file that each row exists at.
// Ignore any deleted rows while reading the file.
//
// @rdesc HRESULT
// @flag S_OK | Got the offsets, Column Names and Data Types
// @flag E_FAIL | Could not obtain all the necessary info
//
HRESULT CFileIO::GenerateFileInfo()
{
ULONG ulDex = 0;
ULONG ulSavePos;

// Generate Column Info, if NULL is returned, a problem
// was encountered while reading the Column Names.
if (S_FALSE != GetColumnName( 0, NULL ))
return ResultFromScode( E_FAIL );

// Generate DataType Mapping, if FALSE is returned, a problem
// was encountered while reading the DataTypes
if (S_FALSE != GetDataTypes( 0, NULL, NULL, NULL ))
return ResultFromScode( E_FAIL );


// Create and Initialize the Index Array
if (FALSE == m_FileIdx.fInit())
return ResultFromScode( E_FAIL );


// Obtain the starting offset for each row
seekg( m_ulDataTypeOffset );
ulSavePos = tellg();
getline( m_pvInput, MAX_INPUT_BUFFER );
while (good() && !eof())
{
//Ignore Deleted Lines
if ('@' != *m_pvInput && '\0' != *m_pvInput)
m_FileIdx.SetIndex( ulDex++, ulSavePos );
ulSavePos = tellg();
getline( m_pvInput, MAX_INPUT_BUFFER );
}

// Store the number of rows
m_cRows = ulDex - 1;

#ifdef _DEBUG
for (ULONG i=0; i < ulDex; i++)
{
// printf("%d - %u\r\n", i, m_FileIdx.GetRowOffset(i));
}
#endif

return ResultFromScode( S_OK );
}


//--------------------------------------------------------------------
// @mfunc Check if the row has already been deleted
//
// @rdesc HRESULT
// @flag S_OK | Row already deleted
// @flag S_FALSE | Row not deleted
//
HRESULT CFileIO::IsDeleted
(
ULONG ulRow //@parm IN | Row to Check
)
{
// Already deleted
if (TRUE == m_FileIdx.IsDeleted( ulRow ))
return ResultFromScode( S_OK );
else
return ResultFromScode( S_FALSE );
}


//--------------------------------------------------------------------
// @mfunc Fill the row with '@' characters, the deletion pattern..
// And set the Deletion status flag in the index class.
//
// @rdesc HRESULT
// @flag S_OK | Deleted Row
// @flag E_FAIL | Row Number was invalid or problem deleting.
//
HRESULT CFileIO::DeleteRow
(
ULONG ulRow //@parm IN | Row to Delete
)
{
assert( is_open());
assert( m_pvInput );

// Check the Row Number
if ((ulRow < 1) || (ulRow > m_cRows))
return ResultFromScode( E_FAIL );

// If already deleted, just ignore.
if (TRUE == m_FileIdx.IsDeleted( ulRow ))
return ResultFromScode( S_OK );

// Set the File Pointer
seekg( m_FileIdx.GetRowOffset( ulRow ));
clear();

// Delete the row in the file and mark the status
// as deleted in the index Array
getline( m_pvInput, MAX_INPUT_BUFFER );
if (good())
{
// Set the number bytes in the stream minus
// the null terminator to this pattern
memset( m_pvInput, '@', gcount() - 1 );
seekp( m_FileIdx.GetRowOffset( ulRow ));
clear();
write( m_pvInput, gcount() - 1 );
if (bad())
return ResultFromScode( E_FAIL );
else
flush();
}
else
return ResultFromScode( E_FAIL );

m_FileIdx.DeleteRow( ulRow );

return ResultFromScode( S_OK );
}


//--------------------------------------------------------------------
// @mfunc Establish the Binding Information for the given file
//
// @rdesc HRESULT
// @flag S_OK | Binding set
// @flag E_FAIL | Problem setting the binding
//
HRESULT CFileIO::SetColumnBind
(
DWORD dwCol, //@parm IN | Column Number
PCOLUMNDATA pColumn, //@parm IN | Pointer to the Data Area
SDWORD sdwMaxLen //@parm IN | Maximum size for the value
)
{
assert( is_open());
assert( m_rgpColumnData );
assert( m_rgsdwMaxLen );

// If the column number is in range then return
// the pointer
if ((0 == dwCol) || (m_dwColumns < dwCol))
return ResultFromScode( E_FAIL );

// Expect valid pointer
assert( pColumn );

m_rgpColumnData[dwCol] = pColumn;
m_rgsdwMaxLen[dwCol] = sdwMaxLen;

return ResultFromScode( S_OK );
}


//--------------------------------------------------------------------
// @mfunc Fetch the row data from the stream to the internal data
// buffers
//
// @rdesc HRESULT
// @flag S_OK | Row Retrieve successfully
// @flag S_FALSE | End of Result Set
// @flag E_FAIL | Row could not be retrieved
//
HRESULT CFileIO::Fetch
(
ULONG ulRow //@parm IN | Row to retrieve
)
{
assert( is_open());
assert( m_rgpColumnData );
assert( m_rgsdwMaxLen );

// Check the Row Number
if ((ulRow < 1))
return ResultFromScode( E_FAIL );

//Check end of Result Set
if (ulRow > m_cRows)
return ResultFromScode( S_FALSE );

// Set the File Pointer to the row.
seekg( m_FileIdx.GetRowOffset( ulRow ));
clear();

// Retrieve the column names record
getline( m_pvInput, MAX_INPUT_BUFFER );
if (good() && 0 < gcount())
{
return ParseRowValues();
}

return ResultFromScode( E_FAIL );
}


//--------------------------------------------------------------------
// @mfunc Tokenize the Data values and put them into the correct
// binding areas
//
// @rdesc HRESULT
// @flag S_OK | Parsing yielded no Error
// @flag E_FAIL | Data value could not be parsed or stored
//
HRESULT CFileIO::ParseRowValues
(
void
)
{
DWORD dwColumns = 0;
DWORD cQuotes = 0;
LPTSTR pvCopy,
pvInput,
pLastQuote;

pLastQuote = NULL;
pvCopy = NULL;
pvInput = m_pvInput;

assert( pvInput );
assert( m_dwColumns > 0 );

while ('\0' != *pvInput)
{

// Check for Quotes
if (0 == strncmp( "\"", pvInput, sizeof( char )))
{
pLastQuote = pvInput;
cQuotes++;
goto TermCheck;
}

// Check for Comma
// NOTE: THIS won't handle """
if (0 == strncmp( ",", pvInput, sizeof( char )) &&
0 == cQuotes % 2)
{
if (pLastQuote)
memcpy( pLastQuote, "", sizeof( char ));
else
memcpy( pvInput, "", sizeof( char ));

// Increment Columns processed
dwColumns++;

// TRACE(pvCopy ? pvCopy : "<NULL>");
if (FAILED( FillBinding( dwColumns, pvCopy )))
return ResultFromScode( E_FAIL );

pLastQuote = NULL;
pvCopy = NULL;
cQuotes = 0;
goto TermCheck;
}

//Valid First character for next column
if (NULL == pvCopy)
pvCopy = pvInput;

TermCheck:
// Check for Final Null Terminator
if (0 == strncmp( "", (LPTSTR) ((ULONG) pvInput + 1), sizeof( char )))
{
//If we are to the null terminator and have unbalanced "'s
//then we fail
if (0 != cQuotes % 2)
return ResultFromScode( E_FAIL );

if (pLastQuote)
memcpy( pLastQuote, "", sizeof( char ));

// Increment Columns processed
dwColumns++;

// TRACE(pvCopy ? pvCopy : "<NULL>");
if (FAILED( FillBinding( dwColumns, pvCopy )))
return ResultFromScode( E_FAIL );
}

pvInput++;
}

// Check that we returned the correct number of columns
if (dwColumns < m_dwColumns)
return ResultFromScode( E_FAIL );

return ResultFromScode( S_OK );
}

//--------------------------------------------------------------------
// @mfunc Based on the given bindings and column data, put the data
// in the correct area, update the status and length fields
//
// @rdesc HRESULT
// @flag S_OK | Data copied to the specified location
//
HRESULT CFileIO::FillBinding
(
DWORD dwColumn, //@parm IN | Column that value is for
LPTSTR pvCopy //@parm IN | Pointer to data value to transfer
)
{
assert( m_rgswColType );
assert( m_rgpColumnData );
assert( m_rgsdwMaxLen );

// Null Value
if (!pvCopy)
{
m_rgpColumnData[dwColumn]->dwStatus = DBSTATUS_S_ISNULL;
return ResultFromScode( S_OK );
}

switch (m_rgswColType[dwColumn])
{
case TYPE_CHAR:
lstrcpyn((LPTSTR) m_rgpColumnData[dwColumn]->bData, pvCopy, m_rgsdwMaxLen[dwColumn] );
m_rgpColumnData[dwColumn]->dwLength = lstrlen( pvCopy ) + sizeof( char );
m_rgpColumnData[dwColumn]->dwStatus = DBSTATUS_S_OK;
break;

case TYPE_SLONG:
*(ULONG*) m_rgpColumnData[dwColumn]->bData = atol( pvCopy );
m_rgpColumnData[dwColumn]->dwLength = 4;
m_rgpColumnData[dwColumn]->dwStatus = DBSTATUS_S_OK;
break;

default:
assert( !"Unknown Data Type" );
break;
}

return ResultFromScode( S_OK );
}


//--------------------------------------------------------------------
// @mfunc Given a pointer to the new data and the row the the data is
// for, write the data to the file.
//
// @rdesc HRESULT
// @flag S_OK | Record and Indexes updated
// @flag E_FAIL | Problems updating record
//
HRESULT CFileIO::UpdateRow
(
ULONG ulRow, //@parm IN | Row to update
ULONG* ulOffset, //@parm IN | Array of offsets for the columns
BYTE* pbProvRow //@parm IN | Data to update row with.
)
{
LPSTR pvInput = m_pvInput;
PCOLUMNDATA pColData;
DWORD i;
int nCnt;
LONG lPos;

// Check the Row Number
if ((ulRow < 0) || (ulRow > m_cRows))
return ResultFromScode( E_FAIL );

// Updated Rows are added to the end of the file, the row number will
// remain the same until the rowset is closed, because the old
// offset is deleted and the new is put in it's place.
for (i = 1; i <= m_dwColumns; i++)
{
nCnt = 0;

pColData = (PCOLUMNDATA) (pbProvRow + ulOffset[i]);

// Handle NULL Data
if (pColData->dwStatus != DBSTATUS_S_ISNULL)
{
switch (m_rgswColType[i])
{
case TYPE_CHAR:
nCnt = wsprintf( pvInput, "\"%s\"",
(LPTSTR) pColData->bData );
break;

case TYPE_SLONG:
nCnt = wsprintf( pvInput, "%d",
(signed long) *pColData->bData );
break;

default:
assert( !"Unknown Data Type" );
break;

}

}

//Calculate the next append area
pvInput = (LPTSTR) ((ULONG) pvInput + (nCnt * sizeof( char )));

if (i == m_dwColumns)
lstrcat( pvInput, "\n" );
else
{
lstrcat( pvInput, "," );
pvInput = (LPTSTR) ((ULONG) pvInput + sizeof( char ));
}
}

// Write Stream to File
seekg( 0, ios::end );
lPos = tellg();
clear();
write( m_pvInput, lstrlen( m_pvInput ));
if (bad())
return ResultFromScode( E_FAIL );
else
flush();

// Delete old Row and fix up Row offset value
if (SUCCEEDED( DeleteRow( ulRow )))
m_FileIdx.SetIndex( ulRow, lPos );
else
return ResultFromScode( E_FAIL );

return ResultFromScode( S_OK );
}