Main (Form1.frm)
VERSION 5.00
Object = "{EAB22AC0-30C1-11CF-A7EB-0000C05BAE0B}#1.1#0"; "shdocvw.dll"
Object = "{831FDD16-0C5C-11D2-A9FC-0000F8754DA1}#2.0#0"; "MSCOMCTL.OCX"
Object = "{F9043C88-F6F2-101A-A3C9-08002B2F49FB}#1.2#0"; "comdlg32.ocx"
Object = "{298D967A-4550-11D1-957C-444553540000}#7.0#0"; "CJSplit32.OCX"
Begin VB.Form Main
Caption = "HTML Data Refinery"
ClientHeight = 7485
ClientLeft = 165
ClientTop = 735
ClientWidth = 9390
LinkTopic = "Form1"
ScaleHeight = 7485
ScaleWidth = 9390
StartUpPosition = 3 'Windows Default
Begin VB.Timer Timer1
Left = 8880
Top = 0
End
Begin MSComctlLib.StatusBar StatusBar1
Align = 2 'Align Bottom
Height = 255
Left = 0
TabIndex = 0
Top = 7230
Width = 9390
_ExtentX = 16563
_ExtentY = 450
_Version = 393216
BeginProperty Panels {8E3867A5-8586-11D1-B16A-00C0F0283628}
NumPanels = 5
BeginProperty Panel1 {8E3867AB-8586-11D1-B16A-00C0F0283628}
AutoSize = 1
Object.Width = 9604
MinWidth = 5080
EndProperty
BeginProperty Panel2 {8E3867AB-8586-11D1-B16A-00C0F0283628}
Style = 4
Alignment = 2
Enabled = 0 'False
Object.Width = 1270
MinWidth = 1270
TextSave = "SCRL"
EndProperty
BeginProperty Panel3 {8E3867AB-8586-11D1-B16A-00C0F0283628}
Style = 1
Alignment = 2
Enabled = 0 'False
Object.Width = 1270
MinWidth = 1270
TextSave = "CAPS"
EndProperty
BeginProperty Panel4 {8E3867AB-8586-11D1-B16A-00C0F0283628}
Style = 2
Alignment = 2
Enabled = 0 'False
Object.Width = 1270
MinWidth = 1270
TextSave = "NUM"
EndProperty
BeginProperty Panel5 {8E3867AB-8586-11D1-B16A-00C0F0283628}
Style = 5
Alignment = 2
TextSave = "4:28 PM"
EndProperty
EndProperty
End
Begin MSComctlLib.ImageList ImageList1
Left = 8760
Top = 1080
_ExtentX = 1005
_ExtentY = 1005
BackColor = -2147483643
MaskColor = 12632256
_Version = 393216
End
Begin MSComDlg.CommonDialog Dialogs1
Left = 8880
Top = 480
_ExtentX = 847
_ExtentY = 847
_Version = 393216
End
Begin CreativeSplitterProj.CreativeSplitter CreativeSplitter1
Height = 7215
Left = 0
TabIndex = 1
Top = 0
Width = 8655
_ExtentX = 15266
_ExtentY = 12726
FirstControl = "TreeView1"
SecondControl = "TabStrip1"
SplitPosition = 2753
Begin VB.TextBox Text1
Height = 3735
Left = 5040
Locked = -1 'True
MultiLine = -1 'True
ScrollBars = 3 'Both
TabIndex = 5
Top = 2040
Visible = 0 'False
Width = 3015
End
Begin SHDocVwCtl.WebBrowser WebBrowser1
Height = 3615
Left = 3480
TabIndex = 4
Top = 960
Width = 3015
ExtentX = 5318
ExtentY = 6376
ViewMode = 0
Offline = 0
Silent = 0
RegisterAsBrowser= 0
RegisterAsDropTarget= 1
AutoArrange = 0 'False
NoClientEdge = 0 'False
AlignLeft = 0 'False
ViewID = "{0057D0E0-3573-11CF-AE69-08002B2E1262}"
Location = "res://E:\WINNT\system32\shdoclc.dll/dnserror.htm#http:///"
End
Begin MSComctlLib.TabStrip TabStrip1
Height = 7215
Left = 2813
TabIndex = 3
Top = 0
Width = 5842
_ExtentX = 10292
_ExtentY = 12726
_Version = 393216
BeginProperty Tabs {1EFB6598-857C-11D1-B16A-00C0F0283628}
NumTabs = 2
BeginProperty Tab1 {1EFB659A-857C-11D1-B16A-00C0F0283628}
Caption = "HTML"
Object.ToolTipText = "HTML Page View"
ImageVarType = 2
EndProperty
BeginProperty Tab2 {1EFB659A-857C-11D1-B16A-00C0F0283628}
Caption = "Data"
Object.ToolTipText = "Data Extraction"
ImageVarType = 2
EndProperty
EndProperty
End
Begin MSComctlLib.TreeView TreeView1
Height = 7215
Left = 0
TabIndex = 2
Top = 0
Width = 2753
_ExtentX = 4868
_ExtentY = 12726
_Version = 393217
Indentation = 441
LabelEdit = 1
Style = 6
Appearance = 1
End
End
Begin VB.Menu mnuFile
Caption = "&File"
Begin VB.Menu mnuFileOpen
Caption = "&Open HTML..."
Shortcut = ^O
End
Begin VB.Menu mnuFileSave
Caption = "&Save HTML As..."
Shortcut = ^S
End
Begin VB.Menu mnuSep
Caption = "-"
End
Begin VB.Menu mnuFileExit
Caption = "E&xit"
End
End
Begin VB.Menu mnuConvert
Caption = "&Convert"
Begin VB.Menu mnuConvertURL
Caption = "&URL..."
Shortcut = ^U
End
Begin VB.Menu mnuConvertBuild
Caption = "&Build All Projects"
End
Begin VB.Menu mnuSep2
Caption = "-"
End
Begin VB.Menu mnuOptions
Caption = "&Options..."
End
End
Begin VB.Menu mnuHelp
Caption = "&Help"
Begin VB.Menu mnuHelpAbout
Caption = "&About"
End
End
Begin VB.Menu mnuPropertiesPopup
Caption = "PopupProperties"
Visible = 0 'False
Begin VB.Menu mnuProperties
Caption = "&Properties"
End
Begin VB.Menu mnuExpandAll
Caption = "Expand/Collapse &All"
End
End
End
Attribute VB_Name = "Main"
Attribute VB_GlobalNameSpace = False
Attribute VB_Creatable = False
Attribute VB_PredeclaredId = True
Attribute VB_Exposed = False
Option Explicit
Private m_aFiles() As String
Private m_nCurrentFile As Integer
Private Sub Form_Load()
Set g_Options = New Options
g_Options.LoadDefaults
Timer1.Interval = 750
ReDim m_aFiles(1)
m_aFiles(1) = g_Options.GetRecentURL(1)
Call BeginProcessing
End Sub
Private Sub Form_QueryUnload(Cancel As Integer, UnloadMode As Integer)
Dim bYes As Boolean
'bYes = (MsgBox("Unload OK?", vbYesNo, "Unload Me") = vbYes)
'If Not bYes Then Cancel = True
End Sub
Private Sub Form_Unload(Cancel As Integer)
Call g_Options.SaveDefaults
End Sub
Private Sub Form_Resize()
If Me.ScaleHeight > 255 Then
CreativeSplitter1.Move Me.ScaleLeft, Me.ScaleTop, _
Me.ScaleWidth, Me.ScaleHeight - StatusBar1.height
Call CreativeSplitter1_Resize
End If
End Sub
Private Sub CreativeSplitter1_Resize()
WebBrowser1.Move TabStrip1.ClientLeft, TabStrip1.ClientTop, _
TabStrip1.ClientWidth, TabStrip1.ClientHeight
Text1.Move TabStrip1.ClientLeft, TabStrip1.ClientTop, _
TabStrip1.ClientWidth, TabStrip1.ClientHeight
End Sub
Private Sub CreativeSplitter1_Paint()
Call CreativeSplitter1_Resize
End Sub
Private Sub mnuFileExit_Click()
Unload Me
End Sub
Private Sub mnuFileOpen_Click()
On Error GoTo ErrHandler
Dialogs1.CancelError = True
Dialogs1.DialogTitle = "Open Document"
Dialogs1.Filename = ""
Dialogs1.Filter = "HTML Files (*.htm,*.html)|*.htm;*.html|All Files (*.*)|*.*"
Dialogs1.Flags = cdlOFNFileMustExist + cdlOFNPathMustExist + cdlOFNAllowMultiselect + cdlOFNExplorer
Dialogs1.DefaultExt = "*.htm"
OpenAgain:
Dialogs1.ShowOpen
' Capture multiple selection in array
Call BuildFileList(Dialogs1.Filename)
If UBound(m_aFiles) > 1 Then
If Not (vbYes = MsgBox("You have selected more than one file, but only one file may be examined at a time." & vbCrLf & _
"Click Yes to process all files (saving the results as XML), or No to reconsider.", vbYesNo + vbQuestion)) Then
GoTo ErrHandler
End If
End If
Call BeginProcessing
Exit Sub
ErrHandler:
If Err = 20476 Then
MsgBox "Too many files selected. Please try again with fewer files.", vbCritical, "Too many files"
GoTo OpenAgain
End If
UpdateUser "Open canceled."
Err.Clear
End Sub
Private Sub mnuConvertBuild_Click()
Dim bitem As BuildListItem, i&
For i = 1 To g_Options.ProjectCount
Set bitem = g_Options.GetProject(i)
If bitem.Enabled Then
If Not bitem.Process Then Exit For
End If
Next i
End Sub
Private Sub mnuFileSave_Click()
On Error GoTo ErrHandler
Dim f As Integer
Dialogs1.CancelError = True
Dialogs1.DialogTitle = "Save Document"
Dialogs1.Filter = "HTML Files (*.htm,*.html)|*.htm;*.html|XML Files (*.xml)|*.xml|All Files (*.*)|*.*"
Dialogs1.Flags = cdlOFNOverwritePrompt + cdlOFNPathMustExist + cdlOFNNoReadOnlyReturn
Dialogs1.DefaultExt = "*.htm"
Dialogs1.ShowSave
TabStrip1.Tabs(2).Selected = True
f = FreeFile
Open Dialogs1.Filename For Output Lock Read Write As #f
Print #f, Text1.text
Close #f
Exit Sub
ErrHandler:
MsgBox "Save canceled."
Err.Clear
End Sub
Private Sub mnuConvertURL_Click()
CenterForm LoadURL
LoadURL.Show vbModal
If Not LoadURL.m_bCancel Then
ReDim m_aFiles(1)
m_aFiles(1) = g_Options.GetRecentURL(1)
m_nCurrentFile = 0
Call BeginProcessing
End If
End Sub
Private Sub mnuHelpAbout_Click()
CenterForm About
About.Show vbModal
End Sub
Private Sub mnuOptions_Click()
CenterForm Settings
Settings.Show vbModal
End Sub
Private Sub TabStrip1_Click()
Select Case TabStrip1.SelectedItem.Index
Case 1:
Text1.Visible = False
WebBrowser1.Visible = True
Case 2:
WebBrowser1.Visible = False
Text1.Visible = True
Call ParseText
End Select
End Sub
Private Sub TreeView1_KeyPress(KeyAscii As Integer)
If KeyAscii = Asc(vbCr) Then TreeView1_DblClick
End Sub
Private Sub TreeView1_MouseDown(Button As Integer, Shift As Integer, x As Single, y As Single)
Dim nd As MSComctlLib.Node
If Button = vbRightButton Then
Set nd = TreeView1.SelectedItem
mnuExpandAll.Enabled = IIf(nd.children > 0, True, False)
PopupMenu mnuPropertiesPopup
End If
End Sub
Private Sub TreeView1_DblClick()
Dim nd As MSComctlLib.Node
Set nd = TreeView1.SelectedItem
If nd.children = 0 Then mnuProperties_Click
End Sub
Private Sub mnuProperties_Click()
Dim nd As MSComctlLib.Node
Set nd = TreeView1.SelectedItem
Set Properties.Element = nd.tag
Properties.Show vbModal
End Sub
Private Sub mnuExpandAll_Click()
Dim nd As MSComctlLib.Node
Set nd = TreeView1.SelectedItem
nd.Expanded = Not nd.Expanded
ExpandAll nd, nd.Expanded
End Sub
Private Sub ExpandAll(ByVal parent As MSComctlLib.Node, ByVal exp As Boolean)
Dim nd As MSComctlLib.Node
Set nd = parent.Child
Do Until nd Is Nothing
nd.Expanded = exp
ExpandAll nd, exp
Set nd = nd.Next
Loop
End Sub
Private Sub Timer1_Timer()
' On Error Resume Next
Dim hFile As Integer
Dim szString1, szString2 As String
Dim oTemp1, oTemp2 As Object
Dim fs As Object
Dim Count As Long
Select Case WebBrowser1.ReadyState
Case READYSTATE_UNINITIALIZED:
'If we are on an empty IE (just initialized, then jump to loadnextpage
WebBrowser1.Navigate "http://www.microsoft.com/ms.htm"
UpdateUser "Begin loading..."
Exit Sub
Case READYSTATE_INTERACTIVE, READYSTATE_LOADING:
UpdateUser "Still loading..."
Exit Sub
Case READYSTATE_COMPLETE, READYSTATE_LOADED:
'If not finished then wait till next loop.
UpdateUser "Loaded: " & g_Options.GetRecentURL(1)
End Select
Timer1.Enabled = False
If UBound(m_aFiles) > 1 Then TabStrip1.Tabs(2).Selected = True
If m_nCurrentFile = UBound(m_aFiles) Then Call LoadTree
End Sub
Private Sub ParseText()
If Text1.text = "" Then
Set Convert.m_rootElem = WebBrowser1.Document.documentElement
CenterForm Convert
Convert.Show vbModal
Text1.text = Convert.m_strHTML
End If
' Complete the loop, if other files are waiting
If UBound(m_aFiles) > 1 Then
Call SaveXMLFile
Call BeginProcessing
End If
End Sub
Private Sub LoadTree()
Dim nd As MSComctlLib.Node
Dim item As Variant
Dim str As String
If Not WebBrowser1.Document Is Nothing Then
TreeView1.Nodes.Clear
For Each item In WebBrowser1.Document.All
If item.parentElement Is Nothing Then
Set nd = TreeView1.Nodes.Add
Else
Set nd = TreeView1.Nodes.Add(item.parentElement.uniqueID, tvwChild)
End If
Set nd.tag = item
nd.Key = item.uniqueID
nd.text = item.nodeName
Next item
End If
End Sub
Private Sub BeginProcessing()
Dim file$
If m_nCurrentFile < UBound(m_aFiles) Then
m_nCurrentFile = m_nCurrentFile + 1
file = m_aFiles(m_nCurrentFile)
g_Options.AddRecentURL file
TabStrip1.Tabs(1).Selected = True
Text1.text = ""
WebBrowser1.Navigate file
Timer1.Enabled = True
End If
End Sub
' Always saves the currently opened file with .XML extension
Private Sub SaveXMLFile()
Dim f&, file$, x&
x = InStrRev(m_aFiles(m_nCurrentFile), ".")
If x > 0 Then
file = Left(m_aFiles(m_nCurrentFile), x) & "xml"
Else
file = m_aFiles(m_nCurrentFile) & ".xml"
End If
' drop the "file://" extension
If Left(file, 7) = "file://" Then file = Mid(file, 8)
f = FreeFile
Open file For Output Lock Read Write As #f
Print #f, Text1.text
Close #f
End Sub
' Separates multiple selection filename into m_aFiles array
Private Sub BuildFileList(Filename As String)
Dim i&, x&, y&, path$
' Capture multiple selection in array
y = InStr(Filename, Chr(0))
If y > 0 Then
path = "file://" & Left(Filename, y - 1)
i = 1
x = y + 1
y = InStr(x, Filename, Chr(0))
Do Until y = 0
ReDim Preserve m_aFiles(i)
m_aFiles(i) = path & "\" & Mid(Filename, x, y - x)
i = i + 1
x = y + 1
y = InStr(x, Filename, Chr(0))
Loop
ReDim Preserve m_aFiles(i)
m_aFiles(i) = path & "\" & Mid(Filename, x)
Else ' only one file selected
ReDim m_aFiles(1)
m_aFiles(1) = "file://" & Filename
End If
' reset current file counter
m_nCurrentFile = 0
End Sub