perl 爬取同花顺数据

来源:互联网 发布:php ob get contents 编辑:程序博客网 时间:2024/06/02 07:36
use  LWP::UserAgent;use utf8;use DBI;  $user="root";  $passwd='xxx';  $dbh="";  $dbh = DBI->connect("dbi:mysql:database=zjzc_vote;host=14.5.5.57;port=3306",$user,$passwd) or die "can't connect to  database ". DBI-errstr;  $dbh->do("SET NAMES utf8"); use POSIX;use Data::Dumper;use HTML::TreeBuilder;open DATAFH,">data.html" || die "open data file failed:$!"; my $ua = LWP::UserAgent->new;$ua->timeout(10);$ua->env_proxy;$ua->agent("Mozilla/8.0");my $response = $ua->get('http://data.10jqka.com.cn/financial/yjyg/');if ($response->is_success) { print DATAFH  $response->decoded_content;  # or whatever# print   $response->decoded_content;  # or whatever  use HTML::TreeBuilder::XPath;   $tree= HTML::TreeBuilder::XPath->new;  $tree->parse_file( "data.html");};my $title=  $tree->findvalue('/html/body//span[@class="text-value"]');print "\$title is $title\n";my    @pages=$tree->find_by_tag_name('a');                      #@urlall除了包含每个类别的文章,还包含阅读排行里的文章                      foreach (@pages) {                                               @titlepage = $_->attr('page');                                               foreach (@titlepage) {                                                 if ($_){                                                  if ( $_ > $max ){                                                   $max=$_;};   ###获取版块中每个页面的url                                                     };                                           };};print "\$max is $max\n";for ($m=1;$m<=$max; $m++){my @arr1= $tree->find_by_tag_name("tr") ;shift @arr1;foreach my $row ( @arr1) {   my @arr2= $row->content_list;        my $str1= $arr2[0]->as_text;       my $str2= $arr2[1]->as_text;       my $str3= $arr2[2]->as_text;       my $str4= $arr2[3]->as_text;       my $str5= $arr2[4]->as_text;       my $str6= $arr2[5]->as_text;       my $str7= $arr2[6]->as_text;       my $str8= $arr2[7]->as_text;       print $str1, $str2, $str3, $str4, $str5, $str6, $str7,$str8."\n";   open( E, ">>", "$title-$m.txt" );      print E ($str1."|".$str2."|".$str3."|".$str4."|".$str5."|".$str6."|".$str7."|".$str8."\n");      close E;                   }    }

0 0
原创粉丝点击