字符串分割的问题

来源：互联网发布：电脑端口怎么设置编辑：程序博客网时间：2024/06/10 06:02

一、用strtok函数进行字符串分割

原型： char *strtok(char *str, const char *delim);

功能：分解字符串为一组字符串。

参数说明：str为要分解的字符串，delim为分隔符字符串。

返回值：返回指向第一段被截取出来的字符串的指针(根据 delim 内的字符进行截取). 如果没有找到, 那么就返回 NULL.

调用说明:

(1). 第一次调用 strtok 时, 第一个参数是 strToken, 以后再调用时, 第一个参数必须是 NULL;

(2). 调用 strtok 后, 原字符串会被修改;

(3). strtok 不是一个线程安全的函数.
源代码分析:
从这里贴出的 Linux 下的 string.h 的实现可以看出:

(1). strtok 使用一个全局定义的 char * ___strtok; 来指向截取后的剩余字符串, 所以从第二次开始调用 strtok 后, 第一个参数就只需要设为 NULL 就可以了;

(2). strtok 函数会在一开始就判断其第一个参数, 若不为 NULL, 就取其第一个参数进行截取, 并将 ___strtok 指向截取后的字符串; 若为NULL, 则使用 ___strtok 进行截取, 并将 ___strtok 指向截取后的字符串;

(3). strtok 使用 sbegin 指向待截取的字符串, 当找到 strDelimit 里的字符时, 就在这个字符前面插入 '\0', 这样就相当于从 sbegin 指向的字符串里截取出前面一段了. 而后面一段, 则由 ___strtok 指向, 等待下一次截取.

示例:

//借助strtok实现split#include <string.h>#include <stdio.h>int main(){        char s[] = "Golden Global      View,disk * desk";        const char *d = " ,*";        char *p;        p = strtok(s,d);        while(p)        {                printf("%s\n",p);                p=strtok(NULL,d);        }        return 0;}

二、用STL进行字符串的分割

涉及到string类的两个函数find和substr：
1、find函数
原型：size_t find ( const string& str, size_t pos = 0 ) const;
功能：查找子字符串第一次出现的位置。
参数说明：str为子字符串，pos为初始查找位置。
返回值：找到的话返回第一次出现的位置，否则返回string::npos

2、substr函数
原型：string substr ( size_t pos = 0, size_t n = npos ) const;
功能：获得子字符串。
参数说明：pos为起始位置（默认为0），n为结束位置（默认为npos）
返回值：子字符串

实现如下：

#include <iostream>#include <string>#include <vector>//字符串分割函数std::vector<std::string> split(std::string str,std::string pattern){    std::string::size_type pos;    std::vector<std::string> result;    str+=pattern;//扩展字符串以方便操作    int size=str.size();    for(int i=0; i<size; i++)    {        pos=str.find(pattern,i);        if(pos<size)        {            std::string s=str.substr(i,pos-i);            result.push_back(s);            i=pos+pattern.size()-1;        }    }    return result;}int main(){    std::string str;    std::cout<<"Please input str:"<<std::endl;    //std::cin>>str;    getline(std::cin,str);    std::string pattern;    std::cout<<"Please input pattern:"<<std::endl;    //std::cin>>pattern;    getline(std::cin,pattern);//用于获取含空格的字符串    std::vector<std::string> result=split(str,pattern);    std::cout<<"The result:"<<std::endl;    for(int i=0; i<result.size(); i++)    {        std::cout<<result[i]<<std::endl;    }    std::cin.get();    std::cin.get();    return 0;}

三、用Boost进行字符串的分割

用boost库的正则表达式实现字符串分割
实现如下：

//本程序实现的是利用正则表达式对字符串实现分割//运行环境      VC6.0 + boost 库#include <iostream>#include <cassert>#include <vector>#include <string>#include "boost/regex.hpp"std::vector<std::string> split(std::string str,std::string s){        boost::regex reg(s.c_str());        std::vector<std::string> vec;        boost::sregex_token_iterator it(str.begin(),str.end(),reg,-1);        boost::sregex_token_iterator end;        while(it!=end)        {                vec.push_back(*it++);        }        return vec;}int main(){        std::string str,s;        str="sss/ddd/ggg/hh";        s="/";        std::vector<std::string> vec=split(str,s);        for(int i=0,size=vec.size();i<size;i++)        {                std::cout<<vec[i]<<std::endl;        }        std::cin.get();        std::cin.get();        return 0;}

boost里面有自带的split的函数，如果用boost的话，还是直接用split的好，这里就不多说了，代码如下：

#include <iostream>#include <string>#include <vector>#include <boost/algorithm/string/classification.hpp>#include <boost/algorithm/string/split.hpp> using namespace std; int main(){  string s = "sss/ddd,ggg";  vector<string> vStr;  boost::split( vStr, s, boost::is_any_of( ",/" ), boost::token_compress_on );  for( vector<string>::iterator it = vStr.begin(); it != vStr.end(); ++ it )    cout << *it << endl;  return 0;}

四、以下是c实现的字符串分割

/*  Function: split() * *  Purpose: Splits a string into tokens non-destructively. * *  Parameters: *      char *str => the string to be split *      char *sep => a string of token seperaters *      int max_strs => how many tokens should be returned *      int *toks => place to store the number of tokens found in str *      char meta => the "escape metacharacter", treat the character *                   after this character as a literal and "escape" a *                   seperator * *  Returns: *      2D char array with one token per "row" of the returned *      array. */static char **split(char *str, char *sep, int max_strs, int *toks, char meta){    char **retstr;      /* 2D array which is returned to caller */    char *idx;          /* index pointer into str */    char *end;          /* ptr to end of str */    char *sep_end;      /* ptr to end of seperator string */    char *sep_idx;      /* index ptr into seperator string */    int len = 0;        /* length of current token string */    int curr_str = 0;       /* current index into the 2D return array */    char last_char = (char) 0xFF;    if(!toks) return NULL;    *toks = 0;    if (!str) return NULL;    /*     * find the ends of the respective passed strings so our while() loops     * know where to stop     */    sep_end = sep + strlen(sep);    end = str + strlen(str);    /* remove trailing whitespace */    while(isspace((int) *(end - 1)) && ((end - 1) >= str))        *(--end) = '\0';    /* -1 because of NULL */    /* set our indexing pointers */    sep_idx = sep;    idx = str;    /*     * alloc space for the return string, this is where the pointers to the     * tokens will be stored     */    if((retstr = (char **) malloc((sizeof(char **) * max_strs))) == NULL)        return NULL;    max_strs--;    /* loop thru each letter in the string being tokenized */    while(idx < end)    {        /* loop thru each seperator string char */        while(sep_idx < sep_end)        {            /*             * if the current string-indexed char matches the current             * seperator char...             */            if((*idx == *sep_idx) && (last_char != meta))            {                /* if there's something to store... */                if(len > 0)                {                                       if(curr_str <= max_strs)                    {                        /* allocate space for the new token */                        if((retstr[curr_str] = (char *)                                    malloc((sizeof(char) * len) + 1)) == NULL)                        {                           return NULL;                        }                        /* copy the token into the return string array */                        memcpy(retstr[curr_str], (idx - len), len);                        retstr[curr_str][len] = 0;                                                /* twiddle the necessary pointers and vars */                        len = 0;                        curr_str++;                                                last_char = *idx;                        idx++;                    }                    /*                     * if we've gotten all the tokens requested, return the                     * list                     */                    if(curr_str >= max_strs)                    {                        while(isspace((int) *idx))                            idx++;                        len = end - idx;                                               fflush(stdout);                        if((retstr[curr_str] = (char *)                                    malloc((sizeof(char) * len) + 1)) == NULL)                           return NULL;                        memcpy(retstr[curr_str], idx, len);                        retstr[curr_str][len] = 0;                                               *toks = curr_str + 1;                                                return retstr;                    }                }                else                    /*                     * otherwise, the previous char was a seperator as well,                     * and we should just continue                     */                {                    last_char = *idx;                    idx++;                    /* make sure to reset this so we test all the sep. chars */                    sep_idx = sep;                    len = 0;                }            }            else            {                /* go to the next seperator */                sep_idx++;            }        }        sep_idx = sep;        len++;        last_char = *idx;        idx++;    }    /* put the last string into the list */    if(len > 0)    {               if((retstr[curr_str] = (char *)                    malloc((sizeof(char) * len) + 1)) == NULL)            return NULL;        memcpy(retstr[curr_str], (idx - len), len);        retstr[curr_str][len] = 0;        *toks = curr_str + 1;    }       /* return the token list */    return retstr;}/**************************************************************** * * Free the buffer allocated by split(). * * char** toks = NULL; * int num_toks = 0; * toks = (str, " ", 2, &num_toks, 0); * split_free(&toks, num_toks); * * At this point, toks is again NULL. * ****************************************************************/static void split_free(char ***pbuf, int num_toks){    int i;    char** buf;  /* array of string pointers */    if( pbuf==NULL || *pbuf==NULL )    {        return;    }    buf = *pbuf;    for( i=0; i<num_toks; i++ )    {        if( buf[i] != NULL )        {            free( buf[i] );            buf[i] = NULL;        }    }    free(buf);    *pbuf = NULL;}

//gcc split.c -o split /*  * 系统环境:Linux 或者windows  * 编译: gcc 或者vc++ 6.0  * 运行: ./split  * ----------------------------------------------------------------------------------------------------  * 实现根据分隔符分隔字符串  *----------------------------------------------------------------------------------------------------  */  #include<stdlib.h> #include<stdio.h> #include<string.h> /*  * ----------------------------------------------------------------------------------------------------  * 实现根据分隔符分隔字符串  * @param src:要分隔的字符串  * @param delim:分隔符  * @return 若是src或者delim为空时：返回一个字符串数组指针，以空字符串结尾，            若是无法申请内存：返回NULL,            分隔正常：返回一个字符串数组指针，以空字符串结尾  *----------------------------------------------------------------------------------------------------  */ char **split(const char *src,const char *delim); int main() {   char str[]=" ab cd e fghi kn ";   char *delim=" "; //分隔符   char **result=NULL; //定义一个字符串数组指针   char **p=NULL;   result=split(str,delim);   p=result;   while(*p[0]){ //为空字符串时退出循环         printf("[%s]\n",*p);         free(*p); //释放字符串         p++;   }   free(*p);   free(result); }   char **split(const char *src,const char *delim) {      int size_char_p=sizeof(char*);   char **result=(char**)malloc(size_char_p);   char **ptmp=NULL; //用来记录新扩大的内存地址   char *src_copy;   int index=0; //申请的字符串指针索引   int result_size=1; //申请的字符串指针个数   int i=0;   char *p;   if(src==NULL||delim==NULL){     *result=strdup("");     return result;   }   //备份源字符串   src_copy=strdup(src);   //开始分隔第一个字符串   p=strtok(src_copy,delim);   //全部为空，或者空格   if(p==NULL||strlen(p)==0)   {     //释放备份     free(src_copy);     *result=strdup("");     return result;   }   //重新申请字符串指针空间   ptmp=(char**)realloc(result,(++result_size)*size_char_p);   if(ptmp==NULL){     printf("cannot realloc\n");     free(result);     return NULL;   }   result=ptmp;   *result=strdup(p);   index++;   //重新申请字符串指针的空间   while((p=strtok(NULL,delim)))   {     if(*p=='\0'){        continue;      }     //重新申请字符串指针     ptmp=(char**)realloc(result,(++result_size)*size_char_p);     if(ptmp==NULL){       printf("cannot realloc\n");       //无法申请内存，则释放申请的空间       for(i=0;i<result_size;i++){         free(result[i]);       }       free(result);       return NULL;      }     result=ptmp;     result[index]=strdup(p);     index++;     }    //释放备份字符串    free(src_copy);    //把最后一个置为空    result[index]=strdup("");    return result; }

3.可以分割“，”和"，“ ”分开的字符窜

/* *function 分割.csv格式的文本文件中的字符串，即用","和",""分开的字符串 *@param str 要分割的字符串 *@param  arr 存储分割之后的各个字符串 *@param  siz 把str分割成子字符串的个数 */int csvsplit(char *str, char **arr, int siz){    char *ptr, *end, *left, *right;    int num;    ptr = str;    num = 0;    while ( ( *ptr != '"' && NULL != (end = strchr(ptr, ','))   )         || ( *ptr == '"' && NULL != (end = strstr(ptr, "\",")) ) ) {        if (*ptr == '"')            end++;        *end = '\0';        arr[num++] = ptr;        ptr = end + 1;        if (num >= siz)            return num;    }    arr[num++] = ptr;    return num;}

[C语言]字符串处理 - 以指定的字符串分割字符串(支持中文字符)2008-11-8: 函数StringSplit(分割字符串到一个字符串数组中，其中该数组第0位为分割后字符串的个数)2008-11-10: 函数StringSplit_Struct(以定义一个新结构的方式来实现该函数)/*C代码如下*/#include <stdio.h>/*实现方案1*//*分割字符串到一个字符串数组中，其中该数组第一位为分割后的个数*/char** StringSplit(const char* string,const char* split){     char** result;     /*首先分配一个char*的内存，然后再动态分配剩下的内存*/     result = (char * * )malloc(sizeof(char *)*1);     memset(result,0,sizeof(char *)*1);     /*定义一个遍历用的指针和一个寻找位置用的指针*/     char* p = string;     char* pos = string;     /*无论是否存在该分割串，绝对都会分割到一个字符串*/     int count = 1;     while(*p != '\0')     {         char* temp;         char* tt;         /*查找该字符串*/         pos = strstr(p,split);         /*结果为0说明剩下的字符串中没有该字符了*/         if(pos == 0)         {           result = (char * * )realloc(result,sizeof(char *)*(count+2));           result[0] = count;           result[count] = p;           result[count+1] = NULL;           return result;         }         /*分配临时字符串空间*/         temp = (char * )malloc(sizeof(char)*(pos - p+1));         memset(temp,0,sizeof(char)*(pos - p+1));         /*设置头指针，以便赋值时使用*/         tt = temp;         while(p<=pos)         {           *temp++ = *p++;         }         /*将字符串结尾置零*/         *--temp = '\0';         result = (char * * )realloc(result,sizeof(char *)*(count+1));         result[0] = count;         result[count] = tt;         count++;         /*设置下一次遍历时的指针（重要）。当split长度大于1时，不这样设置会多赋值不必要的字符串*/         p +=strlen(split)-1;     }     return result;}/*实现方案2*//*为方便计数定义的结构，字符串数组从0开始赋值*/typedef struct{     int number;        /*分割的字符串个数*/     char** string;        /*字符串数组*/}StringTab;/*分割字符串到一个字符串数组中*/StringTab StringSplit_Struct(const char* string,const char* split){     StringTab result;     /*首先分配一个char*的内存，然后再动态分配剩下的内存*/     result.string = (char * * )malloc(sizeof(char *)*1);     memset(result.string,0,sizeof(char *)*1);     /*无论是否存在该分割串，绝对都会分割到一个字符串*/     result.number = 0;     /*定义一个遍历用的指针和一个寻找位置用的指针*/     char* p = string;     char* pos = string;     while(*p != '\0')     {        char* temp;        char* tt;        /*查找该字符串*/        pos = strstr(p,split);        /*结果为0说明剩下的字符串中没有该字符了*/        if(pos == 0)        {          result.string = (char * * )realloc(result.string,sizeof(char *)*(result.number+1));          result.string[result.number] = p;          return result;        }        /*分配临时字符串空间*/        temp = (char * )malloc(sizeof(char)*(pos - p+1));        memset(temp,0,sizeof(char)*(pos - p+1));        /*设置头指针，以便赋值时使用*/        tt = temp;        while(p<=pos)        {          *temp++ = *p++;        }        /*将字符串结尾置零*/        *--temp = '\0';        result.string = (char * * )realloc(result.string,sizeof(char *)*(result.number+1));        result.string[result.number] = tt;        /*计数器加一*/        result.number++;        /*设置下一次遍历时的指针（重要）。当split长度大于1时，不这样设置会多赋值不必要的字符串*/        p +=strlen(split)-1;     }     return result;}int main(){     /*进行测试*/     /*方案1测试*/     char** array;     array = StringSplit("a/aaa//哈aa","aaa");     int i ;     for(i=1;i<=(int)array[0];i++)     {         printf("Num:%d I:%d: Value: %s\n",array[0],i,array[i]);     }         array = StringSplit("a/aa哈a//哈aa","哈");     for(i=1;i<=(int)array[0];i++)     {         printf("Num:%d I:%d: Value: %s\n",array[0],i,array[i]);     }     /*方案2测试*/     StringTab array2;     array2 = StringSplit_Struct("a/aaa//哈aa","aaa");     for(i=0;i<=array2.number;i++)     {        printf("Num:%d I:%d: Value: %s\n",array2.number,i,array2.string[i]);     }     array2 = StringSplit_Struct("a/aa哈a//哈aa","哈");     for(i=0;i<=array2.number;i++)     {        printf("Num:%d I:%d: Value: %s\n",array2.number,i,array2.string[i]);     }     return 0;}