山本ワールド
Windowsプログラミング
アルゴリズム Vitual C++ 2008/2013によるWin32/Win64 APIレベルのプログラム 基礎 Vitual C++ 2008/2013によるAPIレベルのプログラム(32/64bit) Wix3でインストーラーを作る Visual C++ 2008 Standard Editonによるフォームアプリケーションのプログラム(32/64bit) Vitual C++ 2008 Standard EditonによるAPIレベルのプログラム(32/64bit) Windows 7対応 Visual C++ 2008 ExpressによるAPIレベルのプログラム Visual C++ 2005 ExpressによるAPIレベルのプログラム Visual C++ Versiosn 5 BORLAND C++ Windowsプログラム全般 Excel VBA その他MSHTMLを用いHTMLファイルを解析しTitle・Keyword・descriptionタグとリンク、イメージファイル名、フレームについて表示
概要
MSHTMLを用いHTMLファイルを解析しTitle・Keyword・descriptionタグとリンク、イメージファイル名、フレームについて表示します。
動作確認環境
実行ファイル・ソースファイルのダウンロード
ソース
下記以外のファイルは、新規プロジェクトで作成したファイルから変更しておりません。プロジェクトは、CLR・Windowsフォームアプリケーションを選択して作成します。
Form1.h
#pragma once
namespace htmlparse4 {
using namespace System;
using namespace System::ComponentModel;
using namespace System::Collections;
using namespace System::Windows::Forms;
using namespace System::Data;
using namespace System::Drawing;
/// <summary>
/// Form1 の概要
///
/// 警告: このクラスの名前を変更する場合、このクラスが依存するすべての .resx ファイルに関連付けられた
/// マネージ リソース コンパイラ ツールに対して 'Resource File Name' プロパティを
/// 変更する必要があります。この変更を行わないと、
/// デザイナと、このフォームに関連付けられたローカライズ済みリソースとが、
/// 正しく相互に利用できなくなります。
/// </summary>
public ref class Form1 : public System::Windows::Forms::Form
{
public:
Form1(void)
{
InitializeComponent();
//
//TODO: ここにコンストラクタ コードを追加します
//
}
protected:
/// <summary>
/// 使用中のリソースをすべてクリーンアップします。
/// </summary>
~Form1()
{
if (components)
{
delete components;
}
}
private: System::Windows::Forms::TextBox^ url_TextBox;
protected:
protected:
private: System::Windows::Forms::Button^ read_button;
private: System::Windows::Forms::Button^ exit_button;
private: System::Windows::Forms::TextBox^ title_textBox;
private: System::Windows::Forms::Label^ label1;
private: System::Windows::Forms::Label^ label2;
private: System::Windows::Forms::Button^ ref_button;
private: System::Windows::Forms::WebBrowser^ webBrowser1;
private: System::Windows::Forms::TextBox^ keyword_TextBox;
private: System::Windows::Forms::Label^ label3;
private: System::Windows::Forms::Label^ label4;
private: System::Windows::Forms::TextBox^ desc_TextBox;
private: System::Windows::Forms::TextBox^ ahref_TextBox;
private: System::Windows::Forms::Label^ label5;
private: System::Windows::Forms::TextBox^ img_src_TextBox;
private: System::Windows::Forms::Label^ label6;
private: System::Windows::Forms::TextBox^ frame_TextBox;
private: System::Windows::Forms::Label^ label7;
private: System::Windows::Forms::Button^ Refresh_Button;
private:
/// <summary>
/// 必要なデザイナ変数です。
/// </summary>
System::ComponentModel::Container ^components;
#pragma region Windows Form Designer generated code
/// <summary>
/// デザイナ サポートに必要なメソッドです。このメソッドの内容を
/// コード エディタで変更しないでください。
/// </summary>
void InitializeComponent(void)
{
this->url_TextBox = (gcnew System::Windows::Forms::TextBox());
this->read_button = (gcnew System::Windows::Forms::Button());
this->exit_button = (gcnew System::Windows::Forms::Button());
this->title_textBox = (gcnew System::Windows::Forms::TextBox());
this->label1 = (gcnew System::Windows::Forms::Label());
this->label2 = (gcnew System::Windows::Forms::Label());
this->ref_button = (gcnew System::Windows::Forms::Button());
this->webBrowser1 = (gcnew System::Windows::Forms::WebBrowser());
this->keyword_TextBox = (gcnew System::Windows::Forms::TextBox());
this->label3 = (gcnew System::Windows::Forms::Label());
this->label4 = (gcnew System::Windows::Forms::Label());
this->desc_TextBox = (gcnew System::Windows::Forms::TextBox());
this->ahref_TextBox = (gcnew System::Windows::Forms::TextBox());
this->label5 = (gcnew System::Windows::Forms::Label());
this->img_src_TextBox = (gcnew System::Windows::Forms::TextBox());
this->label6 = (gcnew System::Windows::Forms::Label());
this->frame_TextBox = (gcnew System::Windows::Forms::TextBox());
this->label7 = (gcnew System::Windows::Forms::Label());
this->Refresh_Button = (gcnew System::Windows::Forms::Button());
this->SuspendLayout();
//
// url_TextBox
//
this->url_TextBox->Location = System::Drawing::Point(87, 6);
this->url_TextBox->Multiline = true;
this->url_TextBox->Name = L"url_TextBox";
this->url_TextBox->Size = System::Drawing::Size(520, 19);
this->url_TextBox->TabIndex = 0;
this->url_TextBox->Text = L"http://www.google.co.jp/index.html";
//
// read_button
//
this->read_button->Location = System::Drawing::Point(14, 518);
this->read_button->Name = L"read_button";
this->read_button->Size = System::Drawing::Size(106, 29);
this->read_button->TabIndex = 1;
this->read_button->Text = L"読み出し(&O)";
this->read_button->UseVisualStyleBackColor = true;
this->read_button->Click += gcnew System::EventHandler(this, &Form1::read_button_Click);
//
// exit_button
//
this->exit_button->Location = System::Drawing::Point(598, 519);
this->exit_button->Name = L"exit_button";
this->exit_button->Size = System::Drawing::Size(104, 28);
this->exit_button->TabIndex = 2;
this->exit_button->Text = L"終了(&X)";
this->exit_button->UseVisualStyleBackColor = true;
this->exit_button->Click += gcnew System::EventHandler(this, &Form1::exit_button_Click);
//
// title_textBox
//
this->title_TextBox->Location = System::Drawing::Point(86, 28);
this->title_TextBox->Multiline = true;
this->title_TextBox->Name = L"title_textBox";
this->title_TextBox->Size = System::Drawing::Size(520, 25);
this->title_TextBox->TabIndex = 3;
//
// label1
//
this->label1->AutoSize = true;
this->label1->Location = System::Drawing::Point(12, 9);
this->label1->Name = L"label1";
this->label1->Size = System::Drawing::Size(18, 12);
this->label1->TabIndex = 4;
this->label1->Text = L"url";
//
// label2
//
this->label2->AutoSize = true;
this->label2->Location = System::Drawing::Point(12, 33);
this->label2->Name = L"label2";
this->label2->Size = System::Drawing::Size(28, 12);
this->label2->TabIndex = 5;
this->label2->Text = L"Title";
//
// ref_button
//
this->ref_button->Location = System::Drawing::Point(624, 1);
this->ref_button->Name = L"ref_button";
this->ref_button->Size = System::Drawing::Size(61, 29);
this->ref_button->TabIndex = 6;
this->ref_button->Text = L"参照(&B)";
this->ref_button->UseVisualStyleBackColor = true;
this->ref_button->Click += gcnew System::EventHandler(this, &Form1::ref_button_Click);
//
// webBrowser1
//
this->webBrowser1->Location = System::Drawing::Point(84, 369);
this->webBrowser1->MinimumSize = System::Drawing::Size(20, 20);
this->webBrowser1->Name = L"webBrowser1";
this->webBrowser1->Size = System::Drawing::Size(519, 128);
this->webBrowser1->TabIndex = 7;
this->webBrowser1->DocumentCompleted += gcnew System::Windows::Forms::WebBrowserDocumentCompletedEventHandler(this, &Form1::webReadAfter);
//
// keyword_TextBox
//
this->keyword_TextBox->Location = System::Drawing::Point(86, 59);
this->keyword_TextBox->Multiline = true;
this->keyword_TextBox->Name = L"keyword_TextBox";
this->keyword_TextBox->Size = System::Drawing::Size(521, 24);
this->keyword_TextBox->TabIndex = 8;
//
// label3
//
this->label3->AutoSize = true;
this->label3->Location = System::Drawing::Point(12, 62);
this->label3->Name = L"label3";
this->label3->Size = System::Drawing::Size(48, 12);
this->label3->TabIndex = 9;
this->label3->Text = L"Keyword";
//
// label4
//
this->label4->AutoSize = true;
this->label4->Location = System::Drawing::Point(12, 92);
this->label4->Name = L"label4";
this->label4->Size = System::Drawing::Size(61, 12);
this->label4->TabIndex = 10;
this->label4->Text = L"description";
//
// desc_TextBox
//
this->desc_TextBox->Location = System::Drawing::Point(86, 89);
this->desc_TextBox->Multiline = true;
this->desc_TextBox->Name = L"desc_TextBox";
this->desc_TextBox->Size = System::Drawing::Size(521, 23);
this->desc_TextBox->TabIndex = 11;
//
// ahref_TextBox
//
this->ahref_TextBox->Location = System::Drawing::Point(86, 122);
this->ahref_TextBox->Multiline = true;
this->ahref_TextBox->Name = L"ahref_TextBox";
this->ahref_TextBox->ScrollBars = System::Windows::Forms::ScrollBars::Both;
this->ahref_TextBox->Size = System::Drawing::Size(517, 63);
this->ahref_TextBox->TabIndex = 12;
//
// label5
//
this->label5->AutoSize = true;
this->label5->Location = System::Drawing::Point(12, 125);
this->label5->Name = L"label5";
this->label5->Size = System::Drawing::Size(35, 12);
this->label5->TabIndex = 13;
this->label5->Text = L"a href";
//
// img_src_TextBox
//
this->img_src_TextBox->Location = System::Drawing::Point(86, 194);
this->img_src_TextBox->Multiline = true;
this->img_src_TextBox->Name = L"img_src_TextBox";
this->img_src_TextBox->ScrollBars = System::Windows::Forms::ScrollBars::Both;
this->img_src_TextBox->Size = System::Drawing::Size(516, 74);
this->img_src_TextBox->TabIndex = 14;
//
// label6
//
this->label6->AutoSize = true;
this->label6->Location = System::Drawing::Point(17, 197);
this->label6->Name = L"label6";
this->label6->Size = System::Drawing::Size(43, 12);
this->label6->TabIndex = 15;
this->label6->Text = L"img src";
//
// frame_TextBox
//
this->frame_TextBox->Location = System::Drawing::Point(86, 274);
this->frame_TextBox->Multiline = true;
this->frame_TextBox->Name = L"frame_TextBox";
this->frame_TextBox->Size = System::Drawing::Size(515, 62);
this->frame_TextBox->TabIndex = 16;
//
// label7
//
this->label7->AutoSize = true;
this->label7->Location = System::Drawing::Point(23, 275);
this->label7->Name = L"label7";
this->label7->Size = System::Drawing::Size(34, 12);
this->label7->TabIndex = 17;
this->label7->Text = L"frame";
//
// Refresh_Button
//
this->Refresh_button->Location = System::Drawing::Point(164, 519);
this->Refresh_button->Name = L"Refresh_Button";
this->Refresh_button->Size = System::Drawing::Size(94, 28);
this->Refresh_button->TabIndex = 19;
this->Refresh_button->Text = L"再読み込み";
this->Refresh_button->UseVisualStyleBackColor = true;
this->Refresh_button->Click += gcnew System::EventHandler(this, &Form1::Refresh_Button_Click);
//
// Form1
//
this->AutoScaleDimensions = System::Drawing::SizeF(6, 12);
this->AutoScaleMode = System::Windows::Forms::AutoScaleMode::Font;
this->ClientSize = System::Drawing::Size(734, 558);
this->Controls->Add(this->Refresh_Button);
this->Controls->Add(this->label7);
this->Controls->Add(this->frame_TextBox);
this->Controls->Add(this->label6);
this->Controls->Add(this->img_src_TextBox);
this->Controls->Add(this->label5);
this->Controls->Add(this->ahref_TextBox);
this->Controls->Add(this->desc_TextBox);
this->Controls->Add(this->label4);
this->Controls->Add(this->label3);
this->Controls->Add(this->keyword_TextBox);
this->Controls->Add(this->webBrowser1);
this->Controls->Add(this->ref_button);
this->Controls->Add(this->label2);
this->Controls->Add(this->label1);
this->Controls->Add(this->title_textBox);
this->Controls->Add(this->exit_button);
this->Controls->Add(this->read_button);
this->Controls->Add(this->url_TextBox);
this->Name = L"Form1";
this->Text = L"HtmlParse";
this->Load += gcnew System::EventHandler(this, &Form1::Form1_Load);
this->ResumeLayout(false);
this->PerformLayout();
}
#pragma endregion
private: System::Void webparse(HtmlDocument^ doc){
title_TextBox->AppendText( doc->Title + "\n");
HtmlElementCollection^ h = doc->GetElementsByTagName("meta");
int i;
for(i=0;i<h->Count;i++){
HtmlElement^ meta=h[i];
String^ s = meta->GetAttribute("name");
if( s=="keywords"){
String^ s2=meta->GetAttribute("content");
keyword_TextBox->AppendText( s2 + "\n");
}else if( s=="description"){
String^ s2=meta->GetAttribute("content");
desc_TextBox->AppendText( s2 + "\n");
}
}
h = doc->GetElementsByTagName("a");
for(i=0;i<h->Count;i++){
HtmlElement^ meta=h[i];
String^ s = meta->GetAttribute("href");
ahref_TextBox->AppendText( s + "\n");
}
h = doc->GetElementsByTagName("img");
for(i=0;i<h->Count;i++){
HtmlElement^ meta=h[i];
String^ s = meta->GetAttribute("src");
img_src_TextBox->AppendText( s + "\n");
}
}
private: System::Void webReadAfter(System::Object^ sender,System::Windows::Forms::WebBrowserDocumentCompletedEventArgs^ e){
if(e->Url==webBrowser1->Url)
MessageBox::Show("読み込み終了" );
else{
MessageBox::Show("フレーム" );
frame_TextBox->AppendText(e->Url->ToString() + "\n");
return;
}
HtmlDocument^ doc = webBrowser1->Document;
webparse(doc);
// フレームの取出し
int count = doc->Window->Frames->Count; // 含まれるフレームの数
for(int n=0;n6lt;count;n++){
HtmlWindow^ win0 = doc->Window->Frames[n]; // 1番目のフレーム情報
HtmlDocument^ doc0 = win0->Document; // 1番目のフレームの文書情報
webparse(doc0);
}
}
private: System::Void exit_button_Click(System::Object^ sender, System::EventArgs^ e) {
Close();
}
private: System::Void read_button_Click(System::Object^ sender, System::EventArgs^ e) {
String^ my_url = this->url_TextBox->Text;
keyword_TextBox->Text="";
desc_TextBox->Text="";
ahref_TextBox->Text="";
img_src_TextBox->Text="";
frame_TextBox->Text="";
if(this->webBrowser1->Document!=nullptr){
this->webBrowser1->Document->OpenNew(true);
}
webBrowser1->Navigate(my_url) ;
}
private: System::Void ref_button_Click(System::Object^ sender, System::EventArgs^ e) {
OpenFileDialog^ openFileDialog1 = gcnew OpenFileDialog();
openFileDialog1->Filter = "HTMLファイル(*.html;*.htm)|*.html;*.htm|すべてのファイル(*.*)|*.*";
openFileDialog1->Title = "HTMLファイル名を選択";
if (openFileDialog1->ShowDialog() == System::Windows::Forms::DialogResult::OK) {
this->url_TextBox->Text=openFileDialog1->FileName;
}
}
private: System::Void webBrowser1_DocumentCompleted(System::Object^ sender, System::Windows::Forms::WebBrowserDocumentCompletedEventArgs^ e) {
}
private: System::Void Refresh_Button_Click(System::Object^ sender, System::EventArgs^ e) {
this->webBrowser1->Refresh();
}
private: System::Void Form1_Load(System::Object^ sender, System::EventArgs^ e) {
}
};
}
htmlcpp
// htmlparse4.cpp : メイン プロジェクト ファイルです。
#include "stdafx.h"
#include "Form1.h"
using namespace htmlparse4;
[STAThreadAttribute]
int main(array<System::String ^> ^args)
{
// コントロールが作成される前に、Windows XP ビジュアル効果を有効にします
Application::EnableVisualStyles();
Application::SetCompatibleTextRenderingDefault(false);
// メイン ウィンドウを作成して、実行します
Application::Run(gcnew Form1());
return 0;
}
Copyright (C) 2012 山本ワールド All Rights Reserved.