The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.

NAME

Novel::Robot::Parser 小说站点解析引擎

INIT

site 支持小说站点名称

晋江:Jjwxc

豆豆:Dddbbb

努努:Nunu

书农:Shunong

爱尚:Asxs

落秋:Luoqiu

顶点:Dingdian

new

初始化解析模块

   my $url = 'http://www.jjwxc.net/onebook.php?novelid=2456';

   #直接指定站点
   my $parser = Novel::Robot::Parser->new( site => 'Jjwxc' );
    
   #通过url自动检测站点
   my $parser = Novel::Robot::Parser->new( site => $url );

   $parser->get_index_ref($url);

INDEX FUNCTION

get_index_ref 获取目录页信息

    my $index_ref = $parser->get_index_ref($index_url, %opt);

parse_index 解析目录页

   my $index_ref = $parser->parse_index($index_html_ref);

update_chapter_id 更新章节id

  $parser->update_chapter_id($index_ref);

update_chapter_num 更新章节数

  $parser->update_chapter_num($index_ref);

CHAPTER FUNCTION

get_chapter_ref 获取章节页信息

    my $chapter_url = 'http://www.jjwxc.net/onebook.php?novelid=2456&chapterid=2';
    my $chapter_ref = $parser->get_chapter_ref($chapter_url, 2);

parse_chapter 解析章节页

   my $chapter_ref = $parser->parse_chapter($chapter_html_ref);

WRITER FUNCTION

get_writer_ref 获取作者页信息

    my $writer_url = 'http://www.jjwxc.net/oneauthor.php?authorid=3243';
    my $writer_ref = $parser->get_writer_ref($writer_url);

parse_writer 解析作者页

   my $writer_ref = $parser->parse_writer($writer_html_ref);

QUERY FUNCTION

get_query_ref 获取查询结果

    my $query_type = '作者';
    my $query_value = '顾漫';
    my $query_ref = $parser->get_query_ref($query_type, $query_value);

make_query_request 指定查询请求

  #查询类型:  $type
  #查询关键字:$keyword
  my ($query_url, $post_data) = 
        $parser->make_query_request( $type, $keyword );

parse_query 解析查询结果

  my $query_ref = $parser->parse_query($query_html_ref); 

parse_query_result_urls 查询结果为分页url

  my $query_urls_ref = $parser->parse_query_result_urls($query_html_ref);

OTHER FUNCTION

get_inner_html 获取html元素的innerHTML

  my $inner_html = $parser->get_inner_html($element);

format_abs_url 批量将url转换成绝对路径

  $parser->format_abs_url($index_ref->{chapter_info}, $index_ref->{index_url});
  $parser->format_abs_url($index_ref->{more_book_info}, $index_ref->{index_url});
  $parser->format_abs_url($query_urls_ref, $query_url);