百度在2015年的时候,就取消了referer关键词显示,导致无法直接获取百度搜索关键词,只能通过百度的referer api接口才能获取。
百度referer api接口地址:https://cloud.baidu.com/doc/RefererAPI/s/hjwvycn07
下面是php对接百度referer api接口的方法:
直接复制下面代码就可以使用,只需要把其中的accessKey、secretKey改成你自己的
<?php
// 根据RFC 3986,除了:
// 1.大小写英文字符
// 2.阿拉伯数字
// 3.点'.'、波浪线'~'、减号'-'以及下划线'_'
// 以外都要编码
global $PERCENT_ENCODED_STRINGS ;
__init();
//填充编码数组
function __init()
{
global $PERCENT_ENCODED_STRINGS ;
for ($i = 0; $i < 256; ++$i) {
$PERCENT_ENCODED_STRINGS[$i] = sprintf("%%%02X", $i);
}
//a-z不编码
foreach (range('a', 'z') as $ch) {
$PERCENT_ENCODED_STRINGS[ord($ch)] = $ch;
}
//A-Z不编码
foreach (range('A', 'Z') as $ch) {
$PERCENT_ENCODED_STRINGS[ord($ch)] = $ch;
}
//0-9不编码
foreach (range('0', '9') as $ch) {
$PERCENT_ENCODED_STRINGS[ord($ch)] = $ch;
}
//以下4个字符不编码
$PERCENT_ENCODED_STRINGS[ord('-')] = '-';
$PERCENT_ENCODED_STRINGS[ord('.')] = '.';
$PERCENT_ENCODED_STRINGS[ord('_')] = '_';
$PERCENT_ENCODED_STRINGS[ord('~')] = '~';
}
//使用编码数组编码
function doUrlEncode($value)
{
global $PERCENT_ENCODED_STRINGS ;
$result = '';
for ($i = 0; $i < strlen($value); ++$i) {
$result .= $PERCENT_ENCODED_STRINGS[ord($value[$i])];
}
return $result;
}
function urlEncodeExceptSlash($path)
{
return str_replace("%2F", "/", doUrlEncode($path));
}
//生成标准化uri
function getCanonicalURIPath($path)
{
//空路径设置为'/'
if (empty($path)) {
return '/';
} else {
//所有的uri必须以'/'开头
if ($path[0] == '/') {
return urlEncodeExceptSlash($path);
} else {
return '/' . urlEncodeExceptSlash($path);
}
}
}
/**
* 获取认证字符串
*
* 任务一:创建前缀字符串(authStringPrefix)
* 任务二:创建规范请求(canonicalRequest),确定签名头域(signedHeaders)
* 任务三:生成派生签名密钥(signingKey)
* 任务四:生成签名摘要(signature),并拼接最终的认证字符串(authorization)
*
* @param $accessKey
* @param $secretKey
* @param $utcTimeStr
* @param $url
* @param $httpMethod
*/
function getAuth($accessKey, $secretKey, $utcTimeStr, $url, $httpMethod)
{
$urlParseRet = parse_url($url);
if (null === $urlParseRet) {
throw new \Exception('url error');
}
$host = isset($urlParseRet['host']) ? $urlParseRet['host'] : '';
$path = isset($urlParseRet['path']) ? $urlParseRet['path'] : '';
$version = "1";
$expirationSeconds = "1800";
$signatureHeaders = "host";
//生成authString
$authString = sprintf("bce-auth-v%s/%s/%s/%s", $version, $accessKey, $utcTimeStr, $expirationSeconds);
//使用sk和authString生成signKey
$signingKey = hash_hmac('sha256', $authString, $secretKey);
//生成标准化uri
$canonicalURI = getCanonicalURIPath($path);
//生成标准化QueryString
$canonicalQueryString = '';
//CanonicalHeaders
$canonicalHeader = $signatureHeaders . ':' . $host;
//组成标准请求串
$canonicalRequest = "$httpMethod\n$canonicalURI\n" . "$canonicalQueryString\n$canonicalHeader";
//使用signKey和标准请求串完成签名
$signature = hash_hmac('sha256', $canonicalRequest, $signingKey);
//组成最终签名串
return "$authString/$signatureHeaders/$signature";
}
$accessKey = 'XXXXX';//(ak)
$secretKey = 'XXXXX';//(sk)
$eqid = "8dc269c900038be6000000055fa9eb6f";//document.referrer获取
$url = "http://referer.bj.baidubce.com/v1/eqid/{$eqid}";
$httpMethod = 'GET';
//设定时间戳,注意:如果自行指定时间戳需要为UTC时间
$timestamp = new \DateTime();
$timestamp->setTimezone(new \DateTimeZone("UTC"));
$utcTimeStr = $timestamp->format("Y-m-d\TH:i:s\Z");
//第一步:生成认证字符串
$auth = getAuth($accessKey, $secretKey, $utcTimeStr, $url, $httpMethod);
// 第二步:构造HTTP请求的header、body等信息
//注意head的格式不是key=>value
$head = array(
"accept-encoding: gzip, deflate",//非必填
"host: referer.bj.baidubce.com",//非必填
"content-type: application/json",
"authorization:{$auth}",
"x-bce-date:{$utcTimeStr}",
"accept: */*"//非必填
);
// 第三步:发送HTTP请求,并输出响应信息。
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_HTTPHEADER, $head);
curl_setopt($curl, CURLOPT_CUSTOMREQUEST, "GET");
curl_setopt($curl, CURLINFO_HEADER_OUT, 1);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
$response = curl_exec($curl);
$request = curl_getinfo($curl, CURLINFO_HEADER_OUT);
$status = curl_getinfo($curl, CURLINFO_HTTP_CODE);
curl_close($curl);
// print("request: {$request}\n");
// print("status: {$status}\n");
// print("response: {$response}\n");
$keywords = '';
if($status == '200'){
$arr = json_decode($response, true);
$keywords = urldecode($arr['wd']);
}
echo $keywords;
exit;
?>
// 根据RFC 3986,除了:
// 1.大小写英文字符
// 2.阿拉伯数字
// 3.点'.'、波浪线'~'、减号'-'以及下划线'_'
// 以外都要编码
global $PERCENT_ENCODED_STRINGS ;
__init();
//填充编码数组
function __init()
{
global $PERCENT_ENCODED_STRINGS ;
for ($i = 0; $i < 256; ++$i) {
$PERCENT_ENCODED_STRINGS[$i] = sprintf("%%%02X", $i);
}
//a-z不编码
foreach (range('a', 'z') as $ch) {
$PERCENT_ENCODED_STRINGS[ord($ch)] = $ch;
}
//A-Z不编码
foreach (range('A', 'Z') as $ch) {
$PERCENT_ENCODED_STRINGS[ord($ch)] = $ch;
}
//0-9不编码
foreach (range('0', '9') as $ch) {
$PERCENT_ENCODED_STRINGS[ord($ch)] = $ch;
}
//以下4个字符不编码
$PERCENT_ENCODED_STRINGS[ord('-')] = '-';
$PERCENT_ENCODED_STRINGS[ord('.')] = '.';
$PERCENT_ENCODED_STRINGS[ord('_')] = '_';
$PERCENT_ENCODED_STRINGS[ord('~')] = '~';
}
//使用编码数组编码
function doUrlEncode($value)
{
global $PERCENT_ENCODED_STRINGS ;
$result = '';
for ($i = 0; $i < strlen($value); ++$i) {
$result .= $PERCENT_ENCODED_STRINGS[ord($value[$i])];
}
return $result;
}
function urlEncodeExceptSlash($path)
{
return str_replace("%2F", "/", doUrlEncode($path));
}
//生成标准化uri
function getCanonicalURIPath($path)
{
//空路径设置为'/'
if (empty($path)) {
return '/';
} else {
//所有的uri必须以'/'开头
if ($path[0] == '/') {
return urlEncodeExceptSlash($path);
} else {
return '/' . urlEncodeExceptSlash($path);
}
}
}
/**
* 获取认证字符串
*
* 任务一:创建前缀字符串(authStringPrefix)
* 任务二:创建规范请求(canonicalRequest),确定签名头域(signedHeaders)
* 任务三:生成派生签名密钥(signingKey)
* 任务四:生成签名摘要(signature),并拼接最终的认证字符串(authorization)
*
* @param $accessKey
* @param $secretKey
* @param $utcTimeStr
* @param $url
* @param $httpMethod
*/
function getAuth($accessKey, $secretKey, $utcTimeStr, $url, $httpMethod)
{
$urlParseRet = parse_url($url);
if (null === $urlParseRet) {
throw new \Exception('url error');
}
$host = isset($urlParseRet['host']) ? $urlParseRet['host'] : '';
$path = isset($urlParseRet['path']) ? $urlParseRet['path'] : '';
$version = "1";
$expirationSeconds = "1800";
$signatureHeaders = "host";
//生成authString
$authString = sprintf("bce-auth-v%s/%s/%s/%s", $version, $accessKey, $utcTimeStr, $expirationSeconds);
//使用sk和authString生成signKey
$signingKey = hash_hmac('sha256', $authString, $secretKey);
//生成标准化uri
$canonicalURI = getCanonicalURIPath($path);
//生成标准化QueryString
$canonicalQueryString = '';
//CanonicalHeaders
$canonicalHeader = $signatureHeaders . ':' . $host;
//组成标准请求串
$canonicalRequest = "$httpMethod\n$canonicalURI\n" . "$canonicalQueryString\n$canonicalHeader";
//使用signKey和标准请求串完成签名
$signature = hash_hmac('sha256', $canonicalRequest, $signingKey);
//组成最终签名串
return "$authString/$signatureHeaders/$signature";
}
$accessKey = 'XXXXX';//(ak)
$secretKey = 'XXXXX';//(sk)
$eqid = "8dc269c900038be6000000055fa9eb6f";//document.referrer获取
$url = "http://referer.bj.baidubce.com/v1/eqid/{$eqid}";
$httpMethod = 'GET';
//设定时间戳,注意:如果自行指定时间戳需要为UTC时间
$timestamp = new \DateTime();
$timestamp->setTimezone(new \DateTimeZone("UTC"));
$utcTimeStr = $timestamp->format("Y-m-d\TH:i:s\Z");
//第一步:生成认证字符串
$auth = getAuth($accessKey, $secretKey, $utcTimeStr, $url, $httpMethod);
// 第二步:构造HTTP请求的header、body等信息
//注意head的格式不是key=>value
$head = array(
"accept-encoding: gzip, deflate",//非必填
"host: referer.bj.baidubce.com",//非必填
"content-type: application/json",
"authorization:{$auth}",
"x-bce-date:{$utcTimeStr}",
"accept: */*"//非必填
);
// 第三步:发送HTTP请求,并输出响应信息。
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_HTTPHEADER, $head);
curl_setopt($curl, CURLOPT_CUSTOMREQUEST, "GET");
curl_setopt($curl, CURLINFO_HEADER_OUT, 1);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
$response = curl_exec($curl);
$request = curl_getinfo($curl, CURLINFO_HEADER_OUT);
$status = curl_getinfo($curl, CURLINFO_HTTP_CODE);
curl_close($curl);
// print("request: {$request}\n");
// print("status: {$status}\n");
// print("response: {$response}\n");
$keywords = '';
if($status == '200'){
$arr = json_decode($response, true);
$keywords = urldecode($arr['wd']);
}
echo $keywords;
exit;
?>
需要注意的是:Referer API只能通过百度智能云服务器BCC(北京region)进行访问,其它服务器无法访问此接口。