PHP获取百度文库解析源码

180it 2019-10-30 PM 2077℃ 0条

代码可能失效,仅供学习
<?php
/**

  • Created by PhpStorm.
  • User: Administrator
  • Date: 2019/7/21 0021
  • Time: 16:15
    */

// 生成用户 唯一值
function buildAppId()
{

mt_srand((double)microtime() * 10000);//optional for php 4.2.0 and up.
$charid = strtoupper(md5(uniqid(rand(), true)));
$hyphen = chr(45);// "-"
$uuid = substr($charid, 0, 8) . $hyphen
    . substr($charid, 8, 4) . $hyphen
    . substr($charid, 12, 4) . $hyphen
    . substr($charid, 16, 4) . $hyphen
    . substr($charid, 20, 12);
return $uuid;

}

// 生成用户 key 值 用于存储用户信息

if (isset($_GET['build']) && $_GET['build'] == 1) {

toJson("APPID生成成功", 200, ['APPID' => buildAppId()]);

}
//var_dump(buildAppId());die;

// 存储用户 唯一值 如果有人需要授权使用 则 生成appid 之后填写到这里 ,生成方法 在地址后面加上 build=1 即可生成
function appId()
{

$data[] = '35FCB049-8B12-BB5B-69C6-3C31708B792C';//酷Q机器人调用
$data[] = 'D19DDCB1-38FA-3F40-6A29-04CF98DAC19A';// ...
$data[] = '0084DA25-0FAB-D163-099E-E9C67CF87E65';// ...
$data[] = '9CA2B554-CCDC-72CF-34BD-AE704AF74F99';// ...
return $data;

}

/**

  • 签名
  • [url=home.php?mod=space&uid=952169]@Param[/url] $appSecret
  • @param $params
  • [url=home.php?mod=space&uid=155549]@Return[/url] string
    */

function buildSign($appId, $params)
{

unset($params['sign']);
ksort($params);
$text = '';
foreach ($params as $k => $v) {
    $text .= $k . $v;
}
return strtoupper(md5(md5($appId . $text . $appId)));

}

if (!isset($_GET['appid']) || !$_GET['appid'] || !in_array($_GET['appid'], appId())) {

toJson("授权失败", 400);

}
if (!isset($_GET['sign']) || !$_GET['sign']) {

toJson("缺少签名参数、请重试", 400);

}
// 验证签名是否正确
$sign = buildSign($_GET['appid'], $_GET);
if ($sign != $_GET['sign']) {

toJson("签名错误,请稍候重试", 400, $sign);

}

// 获取cookie
$cookie = '';

// 网络请求方法
function httpRequest($url, $method, $postfields = null, $headers = array(), $debug = false)
{

$method = strtoupper($method);
$ci = curl_init();
/* Curl settings */
curl_setopt($ci, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_0);
curl_setopt($ci, CURLOPT_HTTPHEADER, array('Client_Ip: ' . mt_rand(0, 255) . '.' . mt_rand(0, 255) . '.' . mt_rand(0, 255) . '.' . mt_rand(0, 255)));//优点:伪造成本低,通杀90%系统
curl_setopt($ci, CURLOPT_HTTPHEADER, array('X-Forwarded-For: ' . mt_rand(0, 255) . '.' . mt_rand(0, 255) . '.' . mt_rand(0, 255) . '.' . mt_rand(0, 255)));//优点:伪造成本低,通杀90%系统
curl_setopt($ci, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0");
curl_setopt($ci, CURLOPT_CONNECTTIMEOUT, 60); /* 在发起连接前等待的时间,如果设置为0,则无限等待 */
curl_setopt($ci, CURLOPT_TIMEOUT, 7); /* 设置cURL允许执行的最长秒数 */
curl_setopt($ci, CURLOPT_RETURNTRANSFER, true);
switch ($method) {
    case "POST":
        curl_setopt($ci, CURLOPT_POST, true);
        if (!empty($postfields)) {
            $tmpdatastr = is_array($postfields) ? http_build_query($postfields) : $postfields;
            curl_setopt($ci, CURLOPT_POSTFIELDS, $tmpdatastr);
        }
        break;
    default:
        curl_setopt($ci, CURLOPT_CUSTOMREQUEST, $method); /* //设置请求方式 */
        break;
}
$ssl = preg_match('/^https:\/\//i', $url) ? TRUE : FALSE;
curl_setopt($ci, CURLOPT_URL, $url);
if ($ssl) {
    curl_setopt($ci, CURLOPT_SSL_VERIFYPEER, FALSE); // https请求 不验证证书和hosts
    curl_setopt($ci, CURLOPT_SSL_VERIFYHOST, FALSE); // 不从证书中检查SSL加密算法是否存在
}
//curl_setopt($ci, CURLOPT_HEADER, true); /*启用时会将头文件的信息作为数据流输出*/
curl_setopt($ci, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ci, CURLOPT_MAXREDIRS, 2);/*指定最多的HTTP重定向的数量,这个选项是和CURLOPT_FOLLOWLOCATION一起使用的*/
curl_setopt($ci, CURLOPT_HTTPHEADER, $headers);
curl_setopt($ci, CURLINFO_HEADER_OUT, true);
/*curl_setopt($ci, CURLOPT_COOKIE, $Cookiestr); * *COOKIE带过去** */
$response = curl_exec($ci);
$requestinfo = curl_getinfo($ci);
$http_code = curl_getinfo($ci, CURLINFO_HTTP_CODE);
if ($debug) {
    echo "=====post data======\r\n";
    var_dump($postfields);
    echo "=====info===== \r\n";
    print_r($requestinfo);
    echo "=====response=====\r\n";
    print_r($response);
}
curl_close($ci);
return $response;
//return array($http_code, $response,$requestinfo);

}

// 响应 json 格式的参数
function toJson($msg, $code = 200, $data = [])
{

header('Content-Type:application/json; charset=utf-8');
$array['msg'] = $msg;
$array['code'] = $code;
$array['data'] = $data;
exit(json_encode($array));

}

// 短网址转换
function baiDuDwz($longUrl)
{

$host = 'https://dwz.cn';
$path = '/admin/v2/create';
$url = $host . $path;
$method = 'POST';
$content_type = 'application/json';

// TODO: 设置Token

$token = 'd7852c1ffabbf77670845258309ae352';

// TODO:设置待注册长网址

$bodys = array('Url' =>$longUrl, 'TermOfValidity' => 'long-term');

// 配置headers

$headers = array('Content-Type:' . $content_type, 'Token:' . $token);

// 创建连接

$curl = curl_init($url);
$ssl = preg_match('/^https:\/\//i', $host) ? TRUE : FALSE;
curl_setopt($curl, CURLOPT_URL, $url);
if ($ssl) {
    curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE); // https请求 不验证证书和hosts
    curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE); // 不从证书中检查SSL加密算法是否存在
}
curl_setopt($curl, CURLOPT_CUSTOMREQUEST, $method);
curl_setopt($curl, CURLOPT_HTTPHEADER, $headers);
curl_setopt($curl, CURLOPT_FAILONERROR, false);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_HEADER, false);
curl_setopt($curl, CURLOPT_POST, true);
curl_setopt($curl, CURLOPT_POSTFIELDS, json_encode($bodys));

// 发送请求

$response = curl_exec($curl);
curl_close($curl);

// 读取响应

if (!$response){
    return $longUrl;
}
try{
    $response = json_decode($response,true);
}catch (Exception $e){
    toJson('数据获取失败', 400);
}
if ($response['Code'] == 0 && $response['ShortUrl']){
    return $response['ShortUrl'];
}else{
    return $longUrl;
}

}

// 截取文档ID
$url = isset($_GET['url']) ? $_GET['url'] : '';
if (!$url) {

toJson('url参数不正确', 400);

}
$id = strstr(substr($url, strripos($url, "/") + 1), '.', true);
$time = time();
$url = 'https://study.baidu.com/api/p/doc/view/download?doc_id=' . $id . '&t=' . $time . '&fastdown=undefined&t=1559828438675&fr=4';

//文档下载链接 = 网页_访问S (“https://study.baidu.com/api/p/doc/view/download?doc_id=” + 文件ID + “&fastdown=undefined&t=1559828438675&fr=4”, 0, “doc_id=” + 文件ID + “&fastdown=undefined&t=1559828438675&fr=4”, “BAIDUID=0FF2BDE0EC48DAEF1B7333B68B15EAD9:FG=1; BIDUPSID=0FF2BDE0EC48DAEF1B7333B68B15EAD9; PSTM=1559780736; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; delPer=0; BDRCVFR[tFA6N9pQGI3]=mk3SLVN4HKm; H_PS_PSSID=; PSINO=7; Hm_lvt_0785cae3f3d84faf3af8cad08727feb6=1559803237,1559803355,1559803422,1559822550; USERTOKEN=” + USERTOKEN + “; expirseData={#引号expire#引号:1577750400,#引号to_end_day#引号:207,#引号space_c”, , , , , , , , , , , , , , , )

function randIp()
{

return mt_rand(0, 255) . '.' . mt_rand(0, 255) . '.' . mt_rand(0, 255) . '.' . mt_rand(0, 255);

}

// header 请求头
$header = [

'Host: study.baidu.com',
'User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36',
'Accept: application/json,application/x-www-form-urlencoded, */*; q=0.01',
'Referer: ' . $url,
'Cookie: ' . $cookie,
'CLIENT-IP: ' . randIp(),
'X-FORWARDED-FOR: ' . randIp(),

];
// 请求数据
$data = [

'doc_id' => $id,
't' => $time,
'fastdown' => 'undefined',
'fr' => 4

];
$result = httpRequest($url, 'get', $data, $header, false);
if (!$result) {

toJson("系统繁忙,请稍后再试", 400);

}
try {

$result = json_decode($result, true);

} catch (Exception $e) {

toJson("系统繁忙,请稍后再试", 400);

}
if ($result['error'] == 0 && $result['errmsg'] == 'success') {

$down_url = baiDuDwz($result['data']['download_url']);
toJson("解析成功", 200, $down_url);

} else {

toJson("系统繁忙,请稍后再试", 400);

}

支付宝打赏支付宝打赏 微信打赏微信打赏

如果文章或资源对您有帮助,欢迎打赏作者。一路走来,感谢有您!

标签: none

PHP获取百度文库解析源码