stringによる文字列処理の変更点

追加された行はこの色です。
削除された行はこの色です。
stringによる文字列処理へ行く。
stringによる文字列処理の差分を削除
std::stringによる文字列，テキストファイル処理などに関するサンプルコード

----
#contents
----


*インクルードファイル [#rdb23906]
このページのコードはstringをインクルードしていることが前提である．
 #include <string>
一部の処理はsstreamも使っている．
 #include <sstream>
また，名前空間stdは省略できるように以下を宣言しておく．
 using namespace std;


*基本的な処理 [#j47a31bd]
***stringへの変換 [#bb1ddb4e]
sstreamで様々な型の変数をstringへ変換する．
#code(C){{
/*!
 * 様々な型のstringへの変換(stringstreamを使用)
 * @param[in] x 入力
 * @return string型に変換したもの
 */
template<typename T>
inline string RX_TO_STRING(const T &x)
{
	stringstream ss;
	ss << x;
	return ss.str();
}
}}

***"<<"オペレータ [#e0a64ade]
上記のRX_TO_STRINGを使ってstringに"<<"オペレータを定義．
#code(C){{
//! string型に<<オペレータを設定
template<typename T>
inline string &operator<<(string &cb, const T &a)
{
	cb += RX_TO_STRING(a);
	return cb;
}
}}

***小文字化 [#l7330ce4]
#code(C){{
/*!
 * 文字列小文字化
 * @param[inout] str 文字列
 */
inline void StringToLower(string &str)
{
	string::size_type i, size;

	size = str.size();

	for(i = 0; i < size; i++){
		if(str[i] >= 'A' && str[i] <= 'Z') str[i] += 32;
	}

	return;
}
}}

***数値判定 [#z1afc674]
#code(C){{
/*!
 * 文字列が整数値を表しているかを調べる
 * @param[inout] str 文字列
 * @return 整数値ならtrue
 */
inline bool IsInteger(const string &str)
{
	if(str.find_first_not_of("-0123456789 \t") != string::npos) {
		return false;
	}

	return true;
}

/*!
 * 文字列が実数値を表しているかを調べる
 * @param[inout] str 文字列
 * @return 実数値ならtrue
 */
inline bool IsNumeric(const string &str)
{
	if(str.find_first_not_of("-0123456789. Ee\t") != string::npos) {
		return false;
	}

	return true;
}
}}

***空白削除 [#kc5020d1]
文字列に含まれるすべての空白(半角/全角スペース，タブ)を削除
#code(C){{
/*!
 * 空白(スペース，タブ)を削除
 * @param[inout] buf 処理文字列
 */
inline void DeleteSpace(string &buf)
{
	size_t pos;
	while((pos = buf.find_first_of(" 　\t")) != string::npos){
		buf.erase(pos, 1);
	}
}
}}

先頭の空白のみを削除する場合．
#code(C){{
/*!
 * 先頭の空白(スペース，タブ)を削除
 * @param[inout] buf 処理文字列
 */
inline void DeleteHeadSpace(string &buf)
{
	size_t pos;
	while((pos = buf.find_first_of(" 　\t")) == 0){
		buf.erase(buf.begin());
		if(buf.empty()) break;
	}
}
}}

***文字列カウント [#m0251b47]
ある文字列が別の文字列中に含まれている数．
#code(C){{
/*!
 * 文字列に含まれる指定された文字列の数を返す
 * @param[in] s 元の文字列
 * @param[in] c 検索文字列
 * @return 含まれる数
 */
inline int CountString(string &s, int offset, string c)
{
	int count = 0;
	size_t pos0 = offset, pos = 0;
	int n = (int)c.size();

	while((pos = s.find(c, pos0)) != string::npos){
		if(pos != pos0){
			count++;
		}
		else{
			s.erase(s.begin()+pos);
		}
		pos0 = pos+n;
	}

	// 最後の文字列除去
	if(s.rfind(c) == s.size()-n){
		count--;
	}

	return count;
}
}}

***ワイド文字列とマルチバイト文字列の相互変換 [#b00d8ac3]
std::stringとstd::wstringの変換
std::stringとstd::wstringの変換．エントリ関数の最初の方で
 setlocale(LC_CTYPE, "JPN");
などとしてロケールを設定しておくこと．

#code(C){{
#include <string>
#include <stdlib.h>

/*!
 * ワイド文字列からマルチバイト文字列
 *  - setlocale(LC_CTYPE, "JPN");が必要
 * @param[in] src ワイド文字列
 * @return マルチバイト文字列
 */
inline std::string W2S(const std::wstring &src)
{
	std::string dst;
	size_t mbsize = src.length()*MB_CUR_MAX+1;	// マルチバイト文字列での最大サイズ
	char *mbs = new char[mbsize];
	wcstombs(mbs, src.c_str(), mbsize);
	dst = mbs;
	delete [] mbs;
	return dst;
}

/*!
 * マルチバイト文字列からワイド文字列
 *  - setlocale(LC_CTYPE, "JPN");が必要
 * @param[in] src マルチバイト文字列
 * @return ワイド文字列
 */
inline std::wstring S2W(const std::string &src)
{
	std::wstring dst;
	size_t wcsize = src.length()+1;
	wchar_t *wcs = new wchar_t[wcsize];
	mbstowcs(wcs, src.c_str(), wcsize);
	dst = wcs;
	delete [] wcs;
	return dst;
}
}}



*ファイル名処理 [#y2302683]

***フルパスからファイル名を抽出 [#q5713629]
パス区切り"\"と"/"の両方に対応
#code(C){{
/*!
 * パスからファイル名のみ取り出す
 * @param[in] path パス
 * @return ファイル名
 */
inline string GetFileName(const string &path)
{
    size_t pos1;
 
    pos1 = path.rfind('\\');
    if(pos1 != string::npos){
        return path.substr(pos1+1, path.size()-pos1-1);
    }
 
    pos1 = path.rfind('/');
    if(pos1 != string::npos){
        return path.substr(pos1+1, path.size()-pos1-1);
    }
 
    return path;
}
}}

***フルパスからフォルダパスを抽出 [#pf6985f9]
パス区切り"\"と"/"の両方に対応．
#code(C){{
/*!
 * パスからファイル名を取り除いたパスを抽出
 * @param[in] path パス
 * @return フォルダパス
 */
inline string GetFolderPath(const string &path)
{
    size_t pos1;
 
    pos1 = path.rfind('\\');
    if(pos1 != string::npos){
        return path.substr(0, pos1+1);
        
    }
 
    pos1 = path.rfind('/');
    if(pos1 != string::npos){
        return path.substr(0, pos1+1);
    }
 
    return "";
}
}

***フルパスから親フォルダ名を抽出
パス区切り"\"と"/"の両方に対応．
#code(C){{
/*!
 * パスからファイルの親フォルダ名を取り出す
 * @param[in] path ファイルパス
 * @return 親フォルダ名
 */
inline string GetParentFolderName(const string &path)
{
	std::string::size_type pos1, pos0;
	pos1 = path.find_last_of("\\/");
	pos0 = path.find_last_of("\\/", pos1-1);

	if(pos0 != std::string::npos && pos1 != std::string::npos){
		return path.substr(pos0+1, pos1-pos0-1);
	}
	else{
		return "";
	}
}
}}

***フルパスから拡張子を抽出 [#j209b086]
パス区切り"\"と"/"の両方に対応．
#code(C){{
/*!
 * パスから拡張子を小文字にして取り出す
 * @param[in] path ファイルパス
 * @return (小文字化した)拡張子
 */
inline string GetExtension(const string &path)
{
	string ext;
    size_t pos1 = path.rfind('.');
    if(pos1 != string::npos){
        ext = path.substr(pos1+1, path.size()-pos1);
		string::iterator itr = ext.begin();
		while(itr != ext.end()){
			*itr = tolower(*itr);
			itr++;
		}
		itr = ext.end()-1;
		while(itr != ext.begin()){	// パスの最後に\0やスペースがあったときの対策
			if(*itr == 0 || *itr == 32){
				ext.erase(itr--);
			}
			else{
				itr--;
			}
		}
    }

	return ext;
}
}}

***ファイル名から拡張子を削除 [#accc1053]
#code(C){{
/*!
 * ファイル名から拡張子を削除
 * @param[in] fn ファイル名(フルパス or 相対パス)
 * @return フォルダパス
 */
inline string ExtractPathWithoutExt(const string &fn)
{
	string::size_type pos;
	if((pos = fn.find_last_of(".")) == string::npos){
		return fn;
	}

	return fn.substr(0, pos);
}
}}

***ファイル名を抽出(拡張子を除くフラグ付き) [#w175146e]
#code(C){{
string ExtractFileName(const string &path, bool without_extension = true)
{
    string fn;
    string::size_type fpos;
    if((fpos = path.find_last_of("/")) != string::npos){
        fn = path.substr(fpos+1);
    }
    else if((fpos = path.find_last_of("\\")) != string::npos){
		fn = path.substr(fpos+1);
	}
	else{
		fn = path;
	}

	if(without_extension && (fpos = fn.find_last_of(".")) != string::npos){
		fn = fn.substr(0, fpos);
	}

	return fn;
}
}}

***フォルダ区切り位置の検索 [#b34b4573]
#code(C){{
/*!
 * フォルダ区切りの検索(\と/が混じったものにも対応)
 * @param[in] str ファイル・フォルダパス
 * @param[out] pos 見つかった位置
 */
inline bool FindPathBound(const string &str, std::string::size_type &pos)
{
	std::string::size_type pos0, pos1;
	pos0 = str.find_last_of("\\");
	pos1 = str.find_last_of("/");

	if(pos0 == std::string::npos){
		if(pos1 == std::string::npos){
			return false;
		}
		else{
			pos = pos1;
		}
	}
	else{
		if(pos1 == std::string::npos){
			pos = pos0;
		}
		else{
			pos = (pos0 < pos1) ? pos0 : pos1;
		}
	}

	return true;
}
}}


*テキストファイル処理 [#p82bfaa5]

***テキストファイル行処理 [#xed650c8]
getlineでテキストファイルの各行を取り出して処理を行うサンプル．
コメント行や空行はスキップする．
#code(C){{
/*!
 * テキストファイルから各行を取り出して処理
 * @param[in] file_name ファイル名
 * @return ファイルが開けなかったらfalseを返す
 */
bool Read(string file_name)
{
	ifstream file;

	file.open(file_name.c_str());
	if(!file || !file.is_open() || file.bad() || file.fail()){
		cout << "Read : Invalid file specified" << endl;
		return false;
	}

	string buf;
	string::size_type comment_start = 0;
	while(!file.eof()){
		getline(file, buf);

		// '#'以降はコメントとして無視
		if( (comment_start = buf.find('#')) != string::size_type(-1) )
			buf = buf.substr(0, comment_start);

		// 行頭のスペース，タブを削除
		DeleteHeadSpace(buf);

		// 空行は無視
		if(buf.empty())
			continue;
		
		// 行文字列を使った処理

	}

	file.close();

	return true;
}
}}

各行の文字列処理に使えそうな関数を以下に挙げる．


***カンマ区切り文字列からの要素抽出 [#ped4fede]
#code(C){{
/*!
 * stringからpos以降で最初の","までを抽出
 *  - もし，"(ダブルクオーテーション)で囲まれていたらその範囲を抽出
 * @param[in] src 元の文字列
 * @param[out] sub 抽出文字列
 * @param[in] pos 探索開始位置
 * @return 次の抽出開始位置(","の後にスペースがあればそのスペースの後)
 */
inline int GetNextString(const string &src, string &sub, int pos)
{
	bool extracted = false;
	if(src[pos] == '\"'){	// ダブルクオーテーションのチェック
		size_t j = src.find("\"", pos+1);
		if(j != string::npos){
			sub = src.substr(pos+1, j-(pos+1));
			pos = j+1;
			extracted = true;
		}
	}

	size_t i = src.find(",", pos);
	if(i == string::npos){		
		if(!extracted) sub = src.substr(pos, string::npos);
		return (int)string::npos;
	}
	else{
		int cnt = 1;
		while(src[i+cnt] == ' '){	// ","の後のスペースを消す
			cnt++;
		}
		if(!extracted) sub = src.substr(pos, i-pos);
		return (int)(i+cnt >= src.size() ? (int)string::npos : i+cnt);
	}
}


/*!
 * stringから最初の",　"までを抽出
 * @param[in] 
 * @return 
 */
inline int GetFirstString(const string &src, string &sub)
{
	return GetNextString(src, sub, 0);
	//int i = (int)src.find_first_of(", ");
	//sub = src.substr(0, i);
	//return i+2;
}
}}

-使用例
#code(C){{
#include <iostream>
#include <fstream>
#include <string>
using namespace std;
int main(void)
{
	ifstream file;

	file.open("test.csv");
	if(!file || !file.is_open() || file.bad() || file.fail()){
		return 1;
	}

	string buf;
	string::size_type comment_start = 0;
	while(!file.eof()){
		getline(file, buf);

		// '#'以降はコメントとして無視
		if( (comment_start = buf.find('#')) != string::size_type(-1) )
			buf = buf.substr(0, comment_start);

		// 空行は無視
		if(buf.empty())
			continue;

		string sub;
		int pos = 0;
		do{
			pos = GetNextString(buf, sub, pos);
			cout << sub << endl;
		}while(pos != string::npos);
		cout << endl;
	}
	
	file.close();
	return 0;
}
}}
test.csvファイルの内容が以下であった場合，
 # カンマ区切りテキスト
 1, 23, 456,789,  10
 
 "double quotation", abc,"defg, hijk"
出力は以下となる．
 1
 23
 456
 789
 10
 
 double quotation
 abc
 defg, hijk


***文字列からベクトル [#u97e7fe5]
"(0, 1, 2)"のように書かれた文字列からベクトル要素を取り出す．
3次元ベクトル，2次元ベクトルについてのサンプルを示す．
以下のサンプルでは，3 or 2次元ベクトルを表すクラスVec3,Vec2を使っている(各要素のアクセスはオペレータ"[]"で行う)．
#code(C){{
/*!
 * "(x, y, z)"の形式の文字列からVec3型へ変換
 *  - (x)となっていたら(x, x, x)とする．
 * @param[in] s 文字列
 * @param[out] v 値
 * @return 要素記述数
 */
inline int StringToVec3(const string &s, Vec3 &v)
{
	int vcount = 0;
	size_t pos;
	v = Vec3(0.0);
	if((pos = s.find('(')) != string::npos){
		while(pos != string::npos && vcount < 3){
			size_t pos1 = pos;
			if((pos1 = s.find(',', pos+1)) != string::npos){
				v[vcount] = atof(s.substr(pos+1, (pos1-(pos+1))).c_str());
				vcount++;
				pos = pos1;
			}
			else if((pos1 = s.find(')', pos+1)) != string::npos){
				v[vcount] = atof(s.substr(pos+1, (pos1-(pos+1))).c_str());
				vcount++;
				break;
			}
			else{
				break;
			}
		}
	}
	if(vcount < 3){
		for(int i = vcount; i < 3; ++i){
			v[i] = v[vcount-1];
		}
	}

	return vcount;
}

/*!
 * "(x, y)"の形式の文字列からVec2型へ変換
 *  - (x)となっていたら(x, x)とする．
 * @param[in] s 文字列
 * @param[out] v 値
 * @return 要素記述数
 */
inline int StringToVec2(const string &s, Vec2 &v)
{
	int vcount = 0;
	size_t pos;
	v = Vec2(0.0);
	if((pos = s.find('(')) != string::npos){
		while(pos != string::npos && vcount < 2){
			size_t pos1 = pos;
			if((pos1 = s.find(',', pos+1)) != string::npos){
				v[vcount] = atof(s.substr(pos+1, (pos1-(pos+1))).c_str());
				vcount++;
				pos = pos1;
			}
			else if((pos1 = s.find(')', pos+1)) != string::npos){
				v[vcount] = atof(s.substr(pos+1, (pos1-(pos+1))).c_str());
				vcount++;
				break;
			}
			else{
				break;
			}
		}
	}
	if(vcount < 2){
		for(int i = vcount; i < 2; ++i){
			v[i] = v[vcount-1];
		}
	}

	return vcount;
}
}}


***文字列からベクトル(汎用版) [#hfc255d2]
上記の2/3次元ベクトル版をより汎用的に使えるように，
-多次元ベクトルに対応
-文字列中に複数のベクトルがある場合にも対応
-区切り文字を呼び出し側が指定
にしたもの．

#code(C){{
/*!
 * 文字列からベクトル値を抽出
 *  - たとえば，"0, 1, 2"だと，"がend_str, ","がbrk_str
 * @param[in] data 元の文字列
 * @param[in] end_str ベクトルの区切り文字
 * @param[in] brk_str ベクトル要素間の区切り文字
 * @param[in] min_elem 最小要素数
 * @param[out] vecs ベクトル値
 * @return 見つかったベクトルの数
 */
template<class T> 
inline int ExtractVector(string data, const string &end_str, const string &brk_str, int min_elems, 
						 vector< vector<T> > &vecs)
{
	data += end_str;	// 後の処理のために区切り文字列を最後に足しておく
	int n = 0;
	size_t cpos[2] = {0, 0};
	while((cpos[1] = data.find(end_str, cpos[0])) != string::npos){
		// 区切り文字が見つかったら，前回の区切り文字位置との間の文字列を抜き出す
		string sub = data.substr(cpos[0], cpos[1]-cpos[0]);
		if(sub.empty()){
			cpos[0] = cpos[1]+end_str.size();
			break;
		}

		// 抜き出した文字列を各ベクトル要素に分割
		sub += brk_str;
		vector<T> val;
		size_t spos[2] = {0, 0};
		while((spos[1] = sub.find(brk_str, spos[0])) != string::npos){
			string val_str = sub.substr(spos[0], spos[1]-spos[0]);
			DeleteSpace(val_str);
			if(val_str.empty()){
				spos[0] = spos[1]+brk_str.size();
				continue;
			}

			val.push_back((T)atof(val_str.c_str()));
			spos[0] = spos[1]+brk_str.size();
		}
		if((int)val.size() >= min_elems){
			vecs.push_back(val);
			n++;
		}
		cpos[0] = cpos[1]+end_str.size();
	}

	return n;
}
}}



*バイナリファイル [#c1a470ff]
***バイナリファイルからの文字列入力 [#d2e6714f]
#code(C){{
/*!
 * バイナリファイルから型指定でデータを読み込む
 * @param[inout] file ファイル入力ストリーム
 * @return 読み込んだ数値
 */
template<class T> 
inline T ReadBinary(ifstream &file)
{
	T data;
	file.read((char*)&data, sizeof(T));
	return data;
}

/*!
 * バイナリファイルから型・サイズ指定でデータを読み込む
 * @param[inout] file ファイル入力ストリーム
 * @param[inout] byte 読み込むバイト数
 * @return 読み込んだ数値
 */
template<class T> 
inline T ReadBinary(ifstream &file, int byte)
{
	T data = 0;
	file.read((char*)&data, byte);
	return data;
}

/*!
 * バイナリファイルから2バイト分読み込んでint型に格納
 * @param[inout] file ファイル入力ストリーム
 * @return 読み込んだ数値
 */
inline int ReadInt(ifstream &file)
{
	int data = 0;
	file.read((char*)&data, 2);
	return data;
}

/*!
 * バイナリファイルから文字列を読み取る
 * @param[inout] file ファイル入力ストリーム
 * @param[out] name 文字列(何もなければ空)
 * @param[in] max_size 読み込む文字数(-1なら\0まで読み込む)
 * @return 文字列がない(ストリームの最初が\0)ならfalseを返す
 */
inline bool ReadString(ifstream &file, string &name, int max_size)
{
	char c = ReadBinary<char>(file);
	if((int)c == 0) return false;

	name.clear();
	name.push_back(c);
	do{
		c = ReadBinary<char>(file);
		name.push_back(c);
	}while(((int)c != 0) && (max_size == -1 || (int)name.size() < max_size));
	name.push_back((char)0);

	return true;
}
}}



*時刻 [#z0e88a31]
***hh:mm:ss形式への変換 [#v1f704c5]
0付き数値の作成に[[ちょっとした関数]]のGenZeroNoを使っている．
#code(C){{
/*!
 * 秒数を hh:mm:ss の形式に変換
 * @param[in] sec 秒数
 * @param[in] use_msec ミリ秒まで含める(hh:mm:ss.msec)
 * @return hh:mm:ss形式の文字列
 */
inline string GenTimeString(double sec, bool use_msec = false)
{
	long value = (int)(1000*sec+0.5);	// ミリ秒

	unsigned int h = (unsigned int)(value/3600000);	// 時間
	value -= h*3600000;
	unsigned int m = (unsigned int)(value/60000);		// 分
	value -= m*60000;
	unsigned int s = (unsigned int)(value/1000);		// 秒
	value -= s*1000;
	unsigned int ms = (unsigned int)(value);			// ミリ秒

	stringstream ss;
	if(h > 0) ss << GenZeroNo(h, 2) << ":";
	ss << GenZeroNo(m, 2) << ":";
	ss << GenZeroNo(s, 2);
	if(use_msec) ss << "." << GenZeroNo(ms, 3);

	return ss.str();
}

/*!
 * 時刻を hh:mm:ss の形式に変換
 * @param[in] h,m,s 時,分,秒
 * @return hh:mm:ss形式の文字列
 */
inline string GenTimeString(int h, int m, int s)
{
	stringstream ss;
	if(h > 0) ss << GenZeroNo(h, 2) << ":";
	ss << GenZeroNo(m, 2) << ":";
	ss << GenZeroNo(s, 2);
	return ss.str();
}
}}
stringによる文字列処理 の変更点

stringによる文字列処理の変更点