MSHTMLを用いHTMLファイルを解析しTitle・Keyword・descriptionタグとリンク、イメージファイル名、フレームについて表示

icon 項目のみ表示/展開表示の切り替え

概要

MSHTMLを用いHTMLファイルを解析しTitle・Keyword・descriptionタグとリンク、イメージファイル名、フレームについて表示します。

動作確認環境

実行ファイル・ソースファイルのダウンロード

ソース

下記以外のファイルは、新規プロジェクトで作成したファイルから変更しておりません。プロジェクトは、CLR・Windowsフォームアプリケーションを選択して作成します。

Form1.h

#pragma once


namespace htmlparse4 {

        using namespace System;
        using namespace System::ComponentModel;
        using namespace System::Collections;
        using namespace System::Windows::Forms;
        using namespace System::Data;
        using namespace System::Drawing;

        /// <summary>
        /// Form1 の概要
        ///
        /// 警告: このクラスの名前を変更する場合、このクラスが依存するすべての .resx ファイルに関連付けられた
        ///          マネージ リソース コンパイラ ツールに対して 'Resource File Name' プロパティを
        ///          変更する必要があります。この変更を行わないと、
        ///          デザイナと、このフォームに関連付けられたローカライズ済みリソースとが、
        ///          正しく相互に利用できなくなります。
        /// </summary>
        public ref class Form1 : public System::Windows::Forms::Form
        {
        public:
                Form1(void)
                {
                        InitializeComponent();
                        //
                        //TODO: ここにコンストラクタ コードを追加します
                        //
                }

        protected:
                /// <summary>
                /// 使用中のリソースをすべてクリーンアップします。
                /// </summary>
                ~Form1()
                {
                        if (components)
                        {
                                delete components;
                        }
                }
        private: System::Windows::Forms::TextBox^  url_TextBox;
        protected: 

        protected: 
        private: System::Windows::Forms::Button^  read_button;
        private: System::Windows::Forms::Button^  exit_button;
        private: System::Windows::Forms::TextBox^  title_textBox;
        private: System::Windows::Forms::Label^  label1;
        private: System::Windows::Forms::Label^  label2;
        private: System::Windows::Forms::Button^  ref_button;
        private: System::Windows::Forms::WebBrowser^  webBrowser1;
        private: System::Windows::Forms::TextBox^  keyword_TextBox;
        private: System::Windows::Forms::Label^  label3;
        private: System::Windows::Forms::Label^  label4;
        private: System::Windows::Forms::TextBox^  desc_TextBox;
        private: System::Windows::Forms::TextBox^  ahref_TextBox;

        private: System::Windows::Forms::Label^  label5;
        private: System::Windows::Forms::TextBox^  img_src_TextBox;

        private: System::Windows::Forms::Label^  label6;
        private: System::Windows::Forms::TextBox^  frame_TextBox;
        private: System::Windows::Forms::Label^  label7;

        private: System::Windows::Forms::Button^  Refresh_Button;


        private:
                /// <summary>
                /// 必要なデザイナ変数です。
                /// </summary>
                System::ComponentModel::Container ^components;

#pragma region Windows Form Designer generated code
                /// <summary>
                /// デザイナ サポートに必要なメソッドです。このメソッドの内容を
                /// コード エディタで変更しないでください。
                /// </summary>
                void InitializeComponent(void)
                {
                        this->url_TextBox = (gcnew System::Windows::Forms::TextBox());
                        this->read_button = (gcnew System::Windows::Forms::Button());
                        this->exit_button = (gcnew System::Windows::Forms::Button());
                        this->title_textBox = (gcnew System::Windows::Forms::TextBox());
                        this->label1 = (gcnew System::Windows::Forms::Label());
                        this->label2 = (gcnew System::Windows::Forms::Label());
                        this->ref_button = (gcnew System::Windows::Forms::Button());
                        this->webBrowser1 = (gcnew System::Windows::Forms::WebBrowser());
                        this->keyword_TextBox = (gcnew System::Windows::Forms::TextBox());
                        this->label3 = (gcnew System::Windows::Forms::Label());
                        this->label4 = (gcnew System::Windows::Forms::Label());
                        this->desc_TextBox = (gcnew System::Windows::Forms::TextBox());
                        this->ahref_TextBox = (gcnew System::Windows::Forms::TextBox());
                        this->label5 = (gcnew System::Windows::Forms::Label());
                        this->img_src_TextBox = (gcnew System::Windows::Forms::TextBox());
                        this->label6 = (gcnew System::Windows::Forms::Label());
                        this->frame_TextBox = (gcnew System::Windows::Forms::TextBox());
                        this->label7 = (gcnew System::Windows::Forms::Label());
                        this->Refresh_Button = (gcnew System::Windows::Forms::Button());
                        this->SuspendLayout();
                        // 
                        // url_TextBox
                        // 
                        this->url_TextBox->Location = System::Drawing::Point(87, 6);
                        this->url_TextBox->Multiline = true;
                        this->url_TextBox->Name = L"url_TextBox";
                        this->url_TextBox->Size = System::Drawing::Size(520, 19);
                        this->url_TextBox->TabIndex = 0;
                        this->url_TextBox->Text = L"http://www.google.co.jp/index.html";
                        // 
                        // read_button
                        // 
                        this->read_button->Location = System::Drawing::Point(14, 518);
                        this->read_button->Name = L"read_button";
                        this->read_button->Size = System::Drawing::Size(106, 29);
                        this->read_button->TabIndex = 1;
                        this->read_button->Text = L"読み出し(&O)";
                        this->read_button->UseVisualStyleBackColor = true;
                        this->read_button->Click += gcnew System::EventHandler(this, &Form1::read_button_Click);
                        // 
                        // exit_button
                        // 
                        this->exit_button->Location = System::Drawing::Point(598, 519);
                        this->exit_button->Name = L"exit_button";
                        this->exit_button->Size = System::Drawing::Size(104, 28);
                        this->exit_button->TabIndex = 2;
                        this->exit_button->Text = L"終了(&X)";
                        this->exit_button->UseVisualStyleBackColor = true;
                        this->exit_button->Click += gcnew System::EventHandler(this, &Form1::exit_button_Click);
                        // 
                        // title_textBox
                        // 
                        this->title_TextBox->Location = System::Drawing::Point(86, 28);
                        this->title_TextBox->Multiline = true;
                        this->title_TextBox->Name = L"title_textBox";
                        this->title_TextBox->Size = System::Drawing::Size(520, 25);
                        this->title_TextBox->TabIndex = 3;
                        // 
                        // label1
                        // 
                        this->label1->AutoSize = true;
                        this->label1->Location = System::Drawing::Point(12, 9);
                        this->label1->Name = L"label1";
                        this->label1->Size = System::Drawing::Size(18, 12);
                        this->label1->TabIndex = 4;
                        this->label1->Text = L"url";
                        // 
                        // label2
                        // 
                        this->label2->AutoSize = true;
                        this->label2->Location = System::Drawing::Point(12, 33);
                        this->label2->Name = L"label2";
                        this->label2->Size = System::Drawing::Size(28, 12);
                        this->label2->TabIndex = 5;
                        this->label2->Text = L"Title";
                        // 
                        // ref_button
                        // 
                        this->ref_button->Location = System::Drawing::Point(624, 1);
                        this->ref_button->Name = L"ref_button";
                        this->ref_button->Size = System::Drawing::Size(61, 29);
                        this->ref_button->TabIndex = 6;
                        this->ref_button->Text = L"参照(&B)";
                        this->ref_button->UseVisualStyleBackColor = true;
                        this->ref_button->Click += gcnew System::EventHandler(this, &Form1::ref_button_Click);
                        // 
                        // webBrowser1
                        // 
                        this->webBrowser1->Location = System::Drawing::Point(84, 369);
                        this->webBrowser1->MinimumSize = System::Drawing::Size(20, 20);
                        this->webBrowser1->Name = L"webBrowser1";
                        this->webBrowser1->Size = System::Drawing::Size(519, 128);
                        this->webBrowser1->TabIndex = 7;
                        this->webBrowser1->DocumentCompleted += gcnew System::Windows::Forms::WebBrowserDocumentCompletedEventHandler(this, &Form1::webReadAfter);
                        // 
                        // keyword_TextBox
                        // 
                        this->keyword_TextBox->Location = System::Drawing::Point(86, 59);
                        this->keyword_TextBox->Multiline = true;
                        this->keyword_TextBox->Name = L"keyword_TextBox";
                        this->keyword_TextBox->Size = System::Drawing::Size(521, 24);
                        this->keyword_TextBox->TabIndex = 8;
                        // 
                        // label3
                        // 
                        this->label3->AutoSize = true;
                        this->label3->Location = System::Drawing::Point(12, 62);
                        this->label3->Name = L"label3";
                        this->label3->Size = System::Drawing::Size(48, 12);
                        this->label3->TabIndex = 9;
                        this->label3->Text = L"Keyword";
                        // 
                        // label4
                        // 
                        this->label4->AutoSize = true;
                        this->label4->Location = System::Drawing::Point(12, 92);
                        this->label4->Name = L"label4";
                        this->label4->Size = System::Drawing::Size(61, 12);
                        this->label4->TabIndex = 10;
                        this->label4->Text = L"description";
                        // 
                        // desc_TextBox
                        // 
                        this->desc_TextBox->Location = System::Drawing::Point(86, 89);
                        this->desc_TextBox->Multiline = true;
                        this->desc_TextBox->Name = L"desc_TextBox";
                        this->desc_TextBox->Size = System::Drawing::Size(521, 23);
                        this->desc_TextBox->TabIndex = 11;
                        // 
                        // ahref_TextBox
                        // 
                        this->ahref_TextBox->Location = System::Drawing::Point(86, 122);
                        this->ahref_TextBox->Multiline = true;
                        this->ahref_TextBox->Name = L"ahref_TextBox";
                        this->ahref_TextBox->ScrollBars = System::Windows::Forms::ScrollBars::Both;
                        this->ahref_TextBox->Size = System::Drawing::Size(517, 63);
                        this->ahref_TextBox->TabIndex = 12;
                        // 
                        // label5
                        // 
                        this->label5->AutoSize = true;
                        this->label5->Location = System::Drawing::Point(12, 125);
                        this->label5->Name = L"label5";
                        this->label5->Size = System::Drawing::Size(35, 12);
                        this->label5->TabIndex = 13;
                        this->label5->Text = L"a href";
                        // 
                        // img_src_TextBox
                        // 
                        this->img_src_TextBox->Location = System::Drawing::Point(86, 194);
                        this->img_src_TextBox->Multiline = true;
                        this->img_src_TextBox->Name = L"img_src_TextBox";
                        this->img_src_TextBox->ScrollBars = System::Windows::Forms::ScrollBars::Both;
                        this->img_src_TextBox->Size = System::Drawing::Size(516, 74);
                        this->img_src_TextBox->TabIndex = 14;
                        // 
                        // label6
                        // 
                        this->label6->AutoSize = true;
                        this->label6->Location = System::Drawing::Point(17, 197);
                        this->label6->Name = L"label6";
                        this->label6->Size = System::Drawing::Size(43, 12);
                        this->label6->TabIndex = 15;
                        this->label6->Text = L"img src";
                        // 
                        // frame_TextBox
                        // 
                        this->frame_TextBox->Location = System::Drawing::Point(86, 274);
                        this->frame_TextBox->Multiline = true;
                        this->frame_TextBox->Name = L"frame_TextBox";
                        this->frame_TextBox->Size = System::Drawing::Size(515, 62);
                        this->frame_TextBox->TabIndex = 16;
                        // 
                        // label7
                        // 
                        this->label7->AutoSize = true;
                        this->label7->Location = System::Drawing::Point(23, 275);
                        this->label7->Name = L"label7";
                        this->label7->Size = System::Drawing::Size(34, 12);
                        this->label7->TabIndex = 17;
                        this->label7->Text = L"frame";
                        // 
                        // Refresh_Button
                        // 
                        this->Refresh_button->Location = System::Drawing::Point(164, 519);
                        this->Refresh_button->Name = L"Refresh_Button";
                        this->Refresh_button->Size = System::Drawing::Size(94, 28);
                        this->Refresh_button->TabIndex = 19;
                        this->Refresh_button->Text = L"再読み込み";
                        this->Refresh_button->UseVisualStyleBackColor = true;
                        this->Refresh_button->Click += gcnew System::EventHandler(this, &Form1::Refresh_Button_Click);
                        // 
                        // Form1
                        // 
                        this->AutoScaleDimensions = System::Drawing::SizeF(6, 12);
                        this->AutoScaleMode = System::Windows::Forms::AutoScaleMode::Font;
                        this->ClientSize = System::Drawing::Size(734, 558);
                        this->Controls->Add(this->Refresh_Button);
                        this->Controls->Add(this->label7);
                        this->Controls->Add(this->frame_TextBox);
                        this->Controls->Add(this->label6);
                        this->Controls->Add(this->img_src_TextBox);
                        this->Controls->Add(this->label5);
                        this->Controls->Add(this->ahref_TextBox);
                        this->Controls->Add(this->desc_TextBox);
                        this->Controls->Add(this->label4);
                        this->Controls->Add(this->label3);
                        this->Controls->Add(this->keyword_TextBox);
                        this->Controls->Add(this->webBrowser1);
                        this->Controls->Add(this->ref_button);
                        this->Controls->Add(this->label2);
                        this->Controls->Add(this->label1);
                        this->Controls->Add(this->title_textBox);
                        this->Controls->Add(this->exit_button);
                        this->Controls->Add(this->read_button);
                        this->Controls->Add(this->url_TextBox);
                        this->Name = L"Form1";
                        this->Text = L"HtmlParse";
                        this->Load += gcnew System::EventHandler(this, &Form1::Form1_Load);
                        this->ResumeLayout(false);
                        this->PerformLayout();

                }
#pragma endregion
        private: System::Void webparse(HtmlDocument^ doc){
                        title_TextBox->AppendText( doc->Title + "\n");            
                        HtmlElementCollection^ h =      doc->GetElementsByTagName("meta");
                        int i;
                        for(i=0;i<h->Count;i++){
                                HtmlElement^ meta=h[i];

                                String^ s = meta->GetAttribute("name");
                                if( s=="keywords"){
                                        String^ s2=meta->GetAttribute("content");
                                        keyword_TextBox->AppendText( s2  + "\n");
                                }else if( s=="description"){
                                        String^ s2=meta->GetAttribute("content");
                                        desc_TextBox->AppendText( s2  + "\n");
                                }
                        }
                        h =     doc->GetElementsByTagName("a");
                        for(i=0;i<h->Count;i++){
                                HtmlElement^ meta=h[i];
                                String^ s = meta->GetAttribute("href");
                                ahref_TextBox->AppendText( s  + "\n");
                        }
                        h =     doc->GetElementsByTagName("img");
                        for(i=0;i<h->Count;i++){
                                HtmlElement^ meta=h[i];
                                String^ s = meta->GetAttribute("src");
                                img_src_TextBox->AppendText( s  + "\n");
                        }


        }
        private: System::Void webReadAfter(System::Object^ sender,System::Windows::Forms::WebBrowserDocumentCompletedEventArgs^ e){

                if(e->Url==webBrowser1->Url)
                         MessageBox::Show("読み込み終了" );
                else{
                         MessageBox::Show("フレーム" );
                        frame_TextBox->AppendText(e->Url->ToString() + "\n");
                        return;
                }

                HtmlDocument^ doc = webBrowser1->Document;
                webparse(doc);
                        //      フレームの取出し

                        int count = doc->Window->Frames->Count;       // 含まれるフレームの数
                        for(int n=0;n6lt;count;n++){
                                HtmlWindow^ win0 = doc->Window->Frames[n];    // 1番目のフレーム情報
                                HtmlDocument^ doc0 = win0->Document;         // 1番目のフレームの文書情報
                                webparse(doc0);
                        }

                 }
        private: System::Void exit_button_Click(System::Object^  sender, System::EventArgs^  e) {
                                 Close();
                         }
                          

private: System::Void read_button_Click(System::Object^  sender, System::EventArgs^  e) {
                        String^ my_url = this->url_TextBox->Text;
                        keyword_TextBox->Text="";
                        desc_TextBox->Text="";
                        ahref_TextBox->Text="";
                        img_src_TextBox->Text="";
                        frame_TextBox->Text="";
                        if(this->webBrowser1->Document!=nullptr){
                                this->webBrowser1->Document->OpenNew(true);
                        }
                        webBrowser1->Navigate(my_url) ;
                 }

private: System::Void ref_button_Click(System::Object^  sender, System::EventArgs^  e) {
                          OpenFileDialog^ openFileDialog1 = gcnew OpenFileDialog();
                          openFileDialog1->Filter = "HTMLファイル(*.html;*.htm)|*.html;*.htm|すべてのファイル(*.*)|*.*";
                          openFileDialog1->Title = "HTMLファイル名を選択";

                          if (openFileDialog1->ShowDialog() == System::Windows::Forms::DialogResult::OK) {
                                        this->url_TextBox->Text=openFileDialog1->FileName;
                          }
                 }
private: System::Void webBrowser1_DocumentCompleted(System::Object^  sender, System::Windows::Forms::WebBrowserDocumentCompletedEventArgs^  e) {
                 }
private: System::Void Refresh_Button_Click(System::Object^  sender, System::EventArgs^  e) {
                         this->webBrowser1->Refresh();
                 }
private: System::Void Form1_Load(System::Object^  sender, System::EventArgs^  e) {
                 }
};
}

htmlcpp

// htmlparse4.cpp : メイン プロジェクト ファイルです。

#include "stdafx.h"
#include "Form1.h"

using namespace htmlparse4;

[STAThreadAttribute]
int main(array<System::String ^> ^args)
{
        // コントロールが作成される前に、Windows XP ビジュアル効果を有効にします
        Application::EnableVisualStyles();
        Application::SetCompatibleTextRenderingDefault(false); 

        // メイン ウィンドウを作成して、実行します
        Application::Run(gcnew Form1());
        return 0;
}