MSHTMLを用いHTMLファイルを解析しTitle・Keyword・descriptionタグとリンク、イメージファイル名、フレームについて表示します。

動作確認環境

実行ファイル・ソースファイルのダウンロード

ソース

下記以外のファイルは、新規プロジェクトで作成したファイルから変更しておりません。プロジェクトは、CLR・Windowsフォームアプリケーションを選択して作成します。

Form1.h

#pragma once


namespace htmlparse4 {

	using namespace System;
	using namespace System::ComponentModel;
	using namespace System::Collections;
	using namespace System::Windows::Forms;
	using namespace System::Data;
	using namespace System::Drawing;

	/// 
	/// Form1 の概要
	///
	/// 警告: このクラスの名前を変更する場合、このクラスが依存するすべての .resx ファイルに関連付けられた
	///          マネージ リソース コンパイラ ツールに対して 'Resource File Name' プロパティを
	///          変更する必要があります。この変更を行わないと、
	///          デザイナと、このフォームに関連付けられたローカライズ済みリソースとが、
	///          正しく相互に利用できなくなります。
	/// 
	public ref class Form1 : public System::Windows::Forms::Form
	{
	public:
		Form1(void)
		{
			InitializeComponent();
			//
			//TODO: ここにコンストラクタ コードを追加します
			//
		}

	protected:
		/// 
		/// 使用中のリソースをすべてクリーンアップします。
		/// 
		~Form1()
		{
			if (components)
			{
				delete components;
			}
		}
	private: System::Windows::Forms::TextBox^  url_TextBox;
	protected: 

	protected: 
	private: System::Windows::Forms::Button^  read_button;
	private: System::Windows::Forms::Button^  exit_button;
	private: System::Windows::Forms::TextBox^  title_textBox;
	private: System::Windows::Forms::Label^  label1;
	private: System::Windows::Forms::Label^  label2;
	private: System::Windows::Forms::Button^  ref_button;
	private: System::Windows::Forms::WebBrowser^  webBrowser1;
	private: System::Windows::Forms::TextBox^  keyword_TextBox;
	private: System::Windows::Forms::Label^  label3;
	private: System::Windows::Forms::Label^  label4;
	private: System::Windows::Forms::TextBox^  desc_TextBox;
	private: System::Windows::Forms::TextBox^  ahref_TextBox;

	private: System::Windows::Forms::Label^  label5;
	private: System::Windows::Forms::TextBox^  img_src_TextBox;

	private: System::Windows::Forms::Label^  label6;
	private: System::Windows::Forms::TextBox^  frame_TextBox;
	private: System::Windows::Forms::Label^  label7;

	private: System::Windows::Forms::Button^  Refresh_Button;


	private:
		/// 
		/// 必要なデザイナ変数です。
		/// 
		System::ComponentModel::Container ^components;

#pragma region Windows Form Designer generated code
		/// 
		/// デザイナ サポートに必要なメソッドです。このメソッドの内容を
		/// コード エディタで変更しないでください。
		/// 
		void InitializeComponent(void)
		{
			this->url_TextBox = (gcnew System::Windows::Forms::TextBox());
			this->read_button = (gcnew System::Windows::Forms::Button());
			this->exit_button = (gcnew System::Windows::Forms::Button());
			this->title_textBox = (gcnew System::Windows::Forms::TextBox());
			this->label1 = (gcnew System::Windows::Forms::Label());
			this->label2 = (gcnew System::Windows::Forms::Label());
			this->ref_button = (gcnew System::Windows::Forms::Button());
			this->webBrowser1 = (gcnew System::Windows::Forms::WebBrowser());
			this->keyword_TextBox = (gcnew System::Windows::Forms::TextBox());
			this->label3 = (gcnew System::Windows::Forms::Label());
			this->label4 = (gcnew System::Windows::Forms::Label());
			this->desc_TextBox = (gcnew System::Windows::Forms::TextBox());
			this->ahref_TextBox = (gcnew System::Windows::Forms::TextBox());
			this->label5 = (gcnew System::Windows::Forms::Label());
			this->img_src_TextBox = (gcnew System::Windows::Forms::TextBox());
			this->label6 = (gcnew System::Windows::Forms::Label());
			this->frame_TextBox = (gcnew System::Windows::Forms::TextBox());
			this->label7 = (gcnew System::Windows::Forms::Label());
			this->Refresh_Button = (gcnew System::Windows::Forms::Button());
			this->SuspendLayout();
			// 
			// url_TextBox
			// 
			this->url_TextBox->Location = System::Drawing::Point(87, 6);
			this->url_TextBox->Multiline = true;
			this->url_TextBox->Name = L"url_TextBox";
			this->url_TextBox->Size = System::Drawing::Size(520, 19);
			this->url_TextBox->TabIndex = 0;
			this->url_TextBox->Text = L"http://www.google.co.jp/index.html";
			// 
			// read_button
			// 
			this->read_button->Location = System::Drawing::Point(14, 518);
			this->read_button->Name = L"read_button";
			this->read_button->Size = System::Drawing::Size(106, 29);
			this->read_button->TabIndex = 1;
			this->read_button->Text = L"読み出し(&O)";
			this->read_button->UseVisualStyleBackColor = true;
			this->read_button->Click += gcnew System::EventHandler(this, &Form1::read_button_Click);
			// 
			// exit_button
			// 
			this->exit_button->Location = System::Drawing::Point(598, 519);
			this->exit_button->Name = L"exit_button";
			this->exit_button->Size = System::Drawing::Size(104, 28);
			this->exit_button->TabIndex = 2;
			this->exit_button->Text = L"終了(&X)";
			this->exit_button->UseVisualStyleBackColor = true;
			this->exit_button->Click += gcnew System::EventHandler(this, &Form1::exit_button_Click);
			// 
			// title_textBox
			// 
			this->title_textBox->Location = System::Drawing::Point(86, 28);
			this->title_textBox->Multiline = true;
			this->title_textBox->Name = L"title_textBox";
			this->title_textBox->Size = System::Drawing::Size(520, 25);
			this->title_textBox->TabIndex = 3;
			// 
			// label1
			// 
			this->label1->AutoSize = true;
			this->label1->Location = System::Drawing::Point(12, 9);
			this->label1->Name = L"label1";
			this->label1->Size = System::Drawing::Size(18, 12);
			this->label1->TabIndex = 4;
			this->label1->Text = L"url";
			// 
			// label2
			// 
			this->label2->AutoSize = true;
			this->label2->Location = System::Drawing::Point(12, 33);
			this->label2->Name = L"label2";
			this->label2->Size = System::Drawing::Size(28, 12);
			this->label2->TabIndex = 5;
			this->label2->Text = L"Title";
			// 
			// ref_button
			// 
			this->ref_button->Location = System::Drawing::Point(624, 1);
			this->ref_button->Name = L"ref_button";
			this->ref_button->Size = System::Drawing::Size(61, 29);
			this->ref_button->TabIndex = 6;
			this->ref_button->Text = L"参照(&B)";
			this->ref_button->UseVisualStyleBackColor = true;
			this->ref_button->Click += gcnew System::EventHandler(this, &Form1::ref_button_Click);
			// 
			// webBrowser1
			// 
			this->webBrowser1->Location = System::Drawing::Point(84, 369);
			this->webBrowser1->MinimumSize = System::Drawing::Size(20, 20);
			this->webBrowser1->Name = L"webBrowser1";
			this->webBrowser1->Size = System::Drawing::Size(519, 128);
			this->webBrowser1->TabIndex = 7;
			this->webBrowser1->DocumentCompleted += gcnew System::Windows::Forms::WebBrowserDocumentCompletedEventHandler(this, &Form1::webReadAfter);
			// 
			// keyword_TextBox
			// 
			this->keyword_TextBox->Location = System::Drawing::Point(86, 59);
			this->keyword_TextBox->Multiline = true;
			this->keyword_TextBox->Name = L"keyword_TextBox";
			this->keyword_TextBox->Size = System::Drawing::Size(521, 24);
			this->keyword_TextBox->TabIndex = 8;
			// 
			// label3
			// 
			this->label3->AutoSize = true;
			this->label3->Location = System::Drawing::Point(12, 62);
			this->label3->Name = L"label3";
			this->label3->Size = System::Drawing::Size(48, 12);
			this->label3->TabIndex = 9;
			this->label3->Text = L"Keyword";
			// 
			// label4
			// 
			this->label4->AutoSize = true;
			this->label4->Location = System::Drawing::Point(12, 92);
			this->label4->Name = L"label4";
			this->label4->Size = System::Drawing::Size(61, 12);
			this->label4->TabIndex = 10;
			this->label4->Text = L"description";
			// 
			// desc_TextBox
			// 
			this->desc_TextBox->Location = System::Drawing::Point(86, 89);
			this->desc_TextBox->Multiline = true;
			this->desc_TextBox->Name = L"desc_TextBox";
			this->desc_TextBox->Size = System::Drawing::Size(521, 23);
			this->desc_TextBox->TabIndex = 11;
			// 
			// ahref_TextBox
			// 
			this->ahref_TextBox->Location = System::Drawing::Point(86, 122);
			this->ahref_TextBox->Multiline = true;
			this->ahref_TextBox->Name = L"ahref_TextBox";
			this->ahref_TextBox->ScrollBars = System::Windows::Forms::ScrollBars::Both;
			this->ahref_TextBox->Size = System::Drawing::Size(517, 63);
			this->ahref_TextBox->TabIndex = 12;
			// 
			// label5
			// 
			this->label5->AutoSize = true;
			this->label5->Location = System::Drawing::Point(12, 125);
			this->label5->Name = L"label5";
			this->label5->Size = System::Drawing::Size(35, 12);
			this->label5->TabIndex = 13;
			this->label5->Text = L"a href";
			// 
			// img_src_TextBox
			// 
			this->img_src_TextBox->Location = System::Drawing::Point(86, 194);
			this->img_src_TextBox->Multiline = true;
			this->img_src_TextBox->Name = L"img_src_TextBox";
			this->img_src_TextBox->ScrollBars = System::Windows::Forms::ScrollBars::Both;
			this->img_src_TextBox->Size = System::Drawing::Size(516, 74);
			this->img_src_TextBox->TabIndex = 14;
			// 
			// label6
			// 
			this->label6->AutoSize = true;
			this->label6->Location = System::Drawing::Point(17, 197);
			this->label6->Name = L"label6";
			this->label6->Size = System::Drawing::Size(43, 12);
			this->label6->TabIndex = 15;
			this->label6->Text = L"img src";
			// 
			// frame_TextBox
			// 
			this->frame_TextBox->Location = System::Drawing::Point(86, 274);
			this->frame_TextBox->Multiline = true;
			this->frame_TextBox->Name = L"frame_TextBox";
			this->frame_TextBox->Size = System::Drawing::Size(515, 62);
			this->frame_TextBox->TabIndex = 16;
			// 
			// label7
			// 
			this->label7->AutoSize = true;
			this->label7->Location = System::Drawing::Point(23, 275);
			this->label7->Name = L"label7";
			this->label7->Size = System::Drawing::Size(34, 12);
			this->label7->TabIndex = 17;
			this->label7->Text = L"frame";
			// 
			// Refresh_Button
			// 
			this->Refresh_Button->Location = System::Drawing::Point(164, 519);
			this->Refresh_Button->Name = L"Refresh_Button";
			this->Refresh_Button->Size = System::Drawing::Size(94, 28);
			this->Refresh_Button->TabIndex = 19;
			this->Refresh_Button->Text = L"再読み込み";
			this->Refresh_Button->UseVisualStyleBackColor = true;
			this->Refresh_Button->Click += gcnew System::EventHandler(this, &Form1::Refresh_Button_Click);
			// 
			// Form1
			// 
			this->AutoScaleDimensions = System::Drawing::SizeF(6, 12);
			this->AutoScaleMode = System::Windows::Forms::AutoScaleMode::Font;
			this->ClientSize = System::Drawing::Size(734, 558);
			this->Controls->Add(this->Refresh_Button);
			this->Controls->Add(this->label7);
			this->Controls->Add(this->frame_TextBox);
			this->Controls->Add(this->label6);
			this->Controls->Add(this->img_src_TextBox);
			this->Controls->Add(this->label5);
			this->Controls->Add(this->ahref_TextBox);
			this->Controls->Add(this->desc_TextBox);
			this->Controls->Add(this->label4);
			this->Controls->Add(this->label3);
			this->Controls->Add(this->keyword_TextBox);
			this->Controls->Add(this->webBrowser1);
			this->Controls->Add(this->ref_button);
			this->Controls->Add(this->label2);
			this->Controls->Add(this->label1);
			this->Controls->Add(this->title_textBox);
			this->Controls->Add(this->exit_button);
			this->Controls->Add(this->read_button);
			this->Controls->Add(this->url_TextBox);
			this->Name = L"Form1";
			this->Text = L"HtmlParse";
			this->Load += gcnew System::EventHandler(this, &Form1::Form1_Load);
			this->ResumeLayout(false);
			this->PerformLayout();

		}
#pragma endregion
	private: System::Void webparse(HtmlDocument^ doc){
			title_textBox->AppendText( doc->Title + "\n");		
			HtmlElementCollection^ h =	doc->GetElementsByTagName("meta");
			int i;
			for(i=0;iCount;i++){
				HtmlElement^ meta=h[i];

				String^ s = meta->GetAttribute("name");
				if( s=="keywords"){
					String^ s2=meta->GetAttribute("content");
					keyword_TextBox->AppendText( s2  + "\n");
				}else if( s=="description"){
					String^ s2=meta->GetAttribute("content");
					desc_TextBox->AppendText( s2  + "\n");
				}
			}
			h =	doc->GetElementsByTagName("a");
			for(i=0;iCount;i++){
				HtmlElement^ meta=h[i];
				String^ s = meta->GetAttribute("href");
				ahref_TextBox->AppendText( s  + "\n");
			}
			h =	doc->GetElementsByTagName("img");
			for(i=0;iCount;i++){
				HtmlElement^ meta=h[i];
				String^ s = meta->GetAttribute("src");
				img_src_TextBox->AppendText( s  + "\n");
			}


	}
	private: System::Void webReadAfter(System::Object^ sender,System::Windows::Forms::WebBrowserDocumentCompletedEventArgs^ e){

		if(e->Url==webBrowser1->Url)
			 MessageBox::Show("読み込み終了" );
		else{
			 MessageBox::Show("フレーム" );
			frame_TextBox->AppendText(e->Url->ToString() + "\n");
			return;
		}

		HtmlDocument^ doc = webBrowser1->Document;
		webparse(doc);
			//	フレームの取出し

			int count = doc->Window->Frames->Count;       // 含まれるフレームの数
			for(int n=0;nWindow->Frames[n];    // 1番目のフレーム情報
				HtmlDocument^ doc0 = win0->Document;         // 1番目のフレームの文書情報
				webparse(doc0);
			}

		 }
	private: System::Void exit_button_Click(System::Object^  sender, System::EventArgs^  e) {
				 Close();
			 }
			  

private: System::Void read_button_Click(System::Object^  sender, System::EventArgs^  e) {
			String^ my_url = this->url_TextBox->Text;
			keyword_TextBox->Text="";
			desc_TextBox->Text="";
			ahref_TextBox->Text="";
			img_src_TextBox->Text="";
			frame_TextBox->Text="";
			if(this->webBrowser1->Document!=nullptr){
				this->webBrowser1->Document->OpenNew(true);
			}
			webBrowser1->Navigate(my_url) ;
		 }

private: System::Void ref_button_Click(System::Object^  sender, System::EventArgs^  e) {
			  OpenFileDialog^ openFileDialog1 = gcnew OpenFileDialog();
			  openFileDialog1->Filter = "HTMLファイル(*.html;*.htm)|*.html;*.htm|すべてのファイル(*.*)|*.*";
			  openFileDialog1->Title = "HTMLファイル名を選択";

			  if (openFileDialog1->ShowDialog() == System::Windows::Forms::DialogResult::OK) {
					this->url_TextBox->Text=openFileDialog1->FileName;
			  }
		 }
private: System::Void webBrowser1_DocumentCompleted(System::Object^  sender, System::Windows::Forms::WebBrowserDocumentCompletedEventArgs^  e) {
		 }
private: System::Void Refresh_Button_Click(System::Object^  sender, System::EventArgs^  e) {
			 this->webBrowser1->Refresh();
		 }
private: System::Void Form1_Load(System::Object^  sender, System::EventArgs^  e) {
		 }
};
}


htmlcpp

// htmlparse4.cpp : メイン プロジェクト ファイルです。

#include "stdafx.h"
#include "Form1.h"

using namespace htmlparse4;

[STAThreadAttribute]
int main(array ^args)
{
	// コントロールが作成される前に、Windows XP ビジュアル効果を有効にします
	Application::EnableVisualStyles();
	Application::SetCompatibleTextRenderingDefault(false); 

	// メイン ウィンドウを作成して、実行します
	Application::Run(gcnew Form1());
	return 0;
}