本文实例讲述了PHP+MySQL+sphinx+scws实现全文检索功能。分享给大家供大家参考,具体如下:
我的个人环境是PHP7.1+MariaDB10.1.23
下载安装资源包
- sphinx地址
http://sphinxsearch.com/downloads/release/
- PHP的sphinx扩展下载
http://pecl.php.net/package/sphinx
- SCWS 下载地址
http://www.xunsearch.com/scws/download.php
- SCWS 词库下载地址
http://www.xunsearch.com/scws/down/scws-dict-chs-utf8.tar.bz2
安装过程
因为我的是PHP 7 版本,安装sphinx的时候遇到点问题
-
安装
sphinx
1
2
3
4
5
6
7
|
[root@MevHost sphinxb] # mkdir -p /usr/local/src/sphinx
[root@MevHost sphinxb] # cd /usr/local/src/sphinx
[root@MevHost sphinxb] # tar -xf sphinx-2.2.11-release.tar.gz
[root@MevHost sphinxb] # cd sphinx-2.2.11-release
// 这里是指定安装的目录,还有引用mysql,(我这里是mariadb的安装目录)
[root@MevHost sphinxb] # ./configure --prefix=/usr/local/sphinx2 --with-mysql=/usr/local/mariadb/
[root@MevHost sphinxb] # make && make install
|
-
安装
sphinx
客户端
这个要安装上,不然安装PHP安装sphinx扩展时会出现报错
1
2
3
|
[root@MevHost sphinxb] # cd /usr/local/src/sphinx/sphinx-2.2.11-release/api/libsphinxclient //sphinx-2.2.11-release目录下
[root@MevHost sphinxb] # ./configure --prefix=/usr/local/sphinx2/libsphinxclient
[root@MevHost sphinxb] # make && make install
|
-
为PHP安装
sphinx
扩展
1
2
3
4
5
6
|
[root@MevHost sphinxb] # cd /usr/local/src/sphinx
[root@MevHost sphinxb] # tar zxvf sphinx-1.3.1.tgz
[root@MevHost sphinxb] # cd sphinx-1.3.1
[root@MevHost sphinxb] # phpize
[root@MevHost sphinxb] # ./configure --with-sphinx=/usr/local/sphinx2/libsphinxclient --with-php-config=/usr/local/php/bin/php-config
[root@MevHost sphinxb] # make && make install
|
成功后再 php.ini 添加:
1
|
extension=sphinx.so
|
PHP7版本sphinx扩展下载
下载地址
http://git.php.net/?p=pecl/search_engine/sphinx.git;a=shortlog;h=refs/heads/php7
- 安装scws
1
2
3
4
5
|
[root@MevHost sphinxb] # tar -jxvf scws-1.2.3.tar.bz2
[root@MevHost sphinxb] # mkdir /usr/local/scws
[root@MevHost sphinxb] # cd scws-1.2.3
[root@MevHost sphinxb] # ./configure --prefix=/usr/local/scws/
[root@MevHost sphinxb] # make && make install
|
- 为PHP安装scws扩展
1
2
3
4
|
[root@MevHost sphinxb] # cd /usr/local/src/sphinx/scws-1.2.3/phpext
[root@MevHost sphinxb] # phpize
[root@MevHost sphinxb] # ./configure --with-php-config=/usr/local/php/bin/php-config
[root@MevHost sphinxb] # make && make install
|
在php.ini 加入
1
2
3
|
extension = scws.so
scws.default.charset=utf-8
scws.default.fpath = /usr/local/scws/etc
|
- 安装scws词库
1
2
3
|
[root@MevHost sphinxb] # tar jxvf scws-dict-chs-utf8.tar.bz2 -C /usr/local/scws/etc/
#www为php-fpm运行用户
[root@MevHost sphinxb] # chown www:www /usr/local/scws/etc/dict.utf8.xdb
|
创建MySQL数据源
mtest.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
|
/*
Navicat MySQL Data Transfer
Source Database : mtest
Target Server Type : MYSQL
Target Server Version : 50505
File Encoding : 65001
Date : 2017-12-10 17:47:58
*/
SET FOREIGN_KEY_CHECKS=0;
-- ----------------------------
-- Table structure for userinfo
-- ----------------------------
DROP TABLE IF EXISTS `userinfo`;
CREATE TABLE `userinfo` (
`id` int (11) unsigned NOT NULL AUTO_INCREMENT,
`userid` int (11) unsigned NOT NULL DEFAULT '0' ,
`addtime` datetime NOT NULL ,
`post` varchar (20) NOT NULL DEFAULT '' ,
`summary` text NOT NULL ,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=21 DEFAULT CHARSET=utf8;
-- ----------------------------
-- Records of userinfo
-- ----------------------------
INSERT INTO `userinfo` VALUES ( '17' , '1' , '2017-12-10 00:24:54' , '在CentOS7中使用Sendmail通' , 'sendmail' );
INSERT INTO `userinfo` VALUES ( '18' , '2' , '2017-12-10 10:24:54' , '彻底理解PHP的SESSION机制' , 'session' );
INSERT INTO `userinfo` VALUES ( '19' , '3' , '2017-12-10 12:24:54' , '手把手编写自己的PHPMVC框架实例教程' , 'mvc' );
INSERT INTO `userinfo` VALUES ( '20' , '4' , '2017-12-10 00:24:54' , 'php获取今日、昨日、上周、本月的起始时' , '时间' );
-- ----------------------------
-- Table structure for users
-- ----------------------------
DROP TABLE IF EXISTS `users`;
CREATE TABLE `users` (
`id` int (11) unsigned NOT NULL AUTO_INCREMENT,
`username` varchar (20) NOT NULL DEFAULT '' ,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=5 DEFAULT CHARSET=utf8;
-- ----------------------------
-- Records of users
-- ----------------------------
INSERT INTO `users` VALUES ( '1' , 'Lionee' );
INSERT INTO `users` VALUES ( '2' , 'libber' );
INSERT INTO `users` VALUES ( '3' , 'sysmob' );
INSERT INTO `users` VALUES ( '4' , '学习' );
|
配置sphinx
配置文件在/usr/local/sphinx2/etc
1
|
cp sphinx-min.conf.dist sphinx.conf
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
|
source users
{
type = mysql
sql_host = 127.0.0.1
sql_user = root
sql_pass = 123456
sql_db = mtest
sql_port = 3306 # optional, default is 3306
sql_query_pre = SET NAMES utf8
sql_query_pre = SET SESSION query_cache_type=OFF
sql_query = SELECT a. id , a.userid,b.username, UNIX_TIMESTAMP(a.addtime) AS addtime, a.post, a.summary FROM userinfo a left join users b on a.userid = b. id
sql_attr_uint = userid
sql_field_string = username
sql_field_string = post
sql_attr_timestamp = addtime
sql_ranged_throttle = 0
#sql_attr_uint = group_id
#sql_attr_timestamp = date_added
#sql_ranged_throttle = 0
}
source src1throttled : users
{
sql_ranged_throttle = 100
}
index users
{
source = users
path = /usr/local/sphinx2/var/data/users
docinfo = extern
mlock = 0
morphology = none
min_word_len = 1
html_strip = 1
charset_table = U+FF10..U+FF19->0..9, 0..9, U+FF41..U+FF5A->a..z, U+FF21..U+FF3A->a..z,A..Z->a..z, a..z, U+0149, U+017F, U+0138, U+00DF, U+00FF, U+00C0..U+00D6->U+00E0..U+00F6,U+00E0..U+00F6, U+00D8..U+00DE->U+00F8..U+00FE, U+00F8..U+00FE, U+0100->U+0101, U+0101,U+0102->U+0103, U+0103, U+0104->U+0105, U+0105, U+0106->U+0107, U+0107, U+0108->U+0109,U+0109, U+010A->U+010B, U+010B, U+010C->U+010D, U+010D, U+010E->U+010F, U+010F,U+0110->U+0111, U+0111, U+0112->U+0113, U+0113, U+0114->U+0115, U+0115, U+0116->U+0117,U+0117, U+0118->U+0119, U+0119, U+011A->U+011B, U+011B, U+011C->U+011D, U+011D,U+011E->U+011F, U+011F, U+0130->U+0131, U+0131, U+0132->U+0133, U+0133, U+0134->U+0135,U+0135, U+0136->U+0137, U+0137, U+0139->U+013A, U+013A, U+013B->U+013C, U+013C,U+013D->U+013E, U+013E, U+013F->U+0140, U+0140, U+0141->U+0142, U+0142, U+0143->U+0144,U+0144, U+0145->U+0146, U+0146, U+0147->U+0148, U+0148, U+014A->U+014B, U+014B,U+014C->U+014D, U+014D, U+014E->U+014F, U+014F, U+0150->U+0151, U+0151, U+0152->U+0153,U+0153, U+0154->U+0155, U+0155, U+0156->U+0157, U+0157, U+0158->U+0159, U+0159,U+015A->U+015B, U+015B, U+015C->U+015D, U+015D, U+015E->U+015F, U+015F, U+0160->U+0161,U+0161, U+0162->U+0163, U+0163, U+0164->U+0165, U+0165, U+0166->U+0167, U+0167,U+0168->U+0169, U+0169, U+016A->U+016B, U+016B, U+016C->U+016D, U+016D, U+016E->U+016F,U+016F, U+0170->U+0171, U+0171, U+0172->U+0173, U+0173, U+0174->U+0175, U+0175,U+0176->U+0177, U+0177, U+0178->U+00FF, U+00FF, U+0179->U+017A, U+017A, U+017B->U+017C,U+017C, U+017D->U+017E, U+017E, U+0410..U+042F->U+0430..U+044F, U+0430..U+044F,U+05D0..U+05EA, U+0531..U+0556->U+0561..U+0586, U+0561..U+0587, U+0621..U+063A, U+01B9,U+01BF, U+0640..U+064A, U+0660..U+0669, U+066E, U+066F, U+0671..U+06D3, U+06F0..U+06FF,U+0904..U+0939, U+0958..U+095F, U+0960..U+0963, U+0966..U+096F, U+097B..U+097F,U+0985..U+09B9, U+09CE, U+09DC..U+09E3, U+09E6..U+09EF, U+0A05..U+0A39, U+0A59..U+0A5E,U+0A66..U+0A6F, U+0A85..U+0AB9, U+0AE0..U+0AE3, U+0AE6..U+0AEF, U+0B05..U+0B39,U+0B5C..U+0B61, U+0B66..U+0B6F, U+0B71, U+0B85..U+0BB9, U+0BE6..U+0BF2, U+0C05..U+0C39,U+0C66..U+0C6F, U+0C85..U+0CB9, U+0CDE..U+0CE3, U+0CE6..U+0CEF, U+0D05..U+0D39, U+0D60,U+0D61, U+0D66..U+0D6F, U+0D85..U+0DC6, U+1900..U+1938, U+1946..U+194F, U+A800..U+A805,U+A807..U+A822, U+0386->U+03B1, U+03AC->U+03B1, U+0388->U+03B5, U+03AD->U+03B5,U+0389->U+03B7, U+03AE->U+03B7, U+038A->U+03B9, U+0390->U+03B9, U+03AA->U+03B9,U+03AF->U+03B9, U+03CA->U+03B9, U+038C->U+03BF, U+03CC->U+03BF, U+038E->U+03C5,U+03AB->U+03C5, U+03B0->U+03C5, U+03CB->U+03C5, U+03CD->U+03C5, U+038F->U+03C9,U+03CE->U+03C9, U+03C2->U+03C3, U+0391..U+03A1->U+03B1..U+03C1,U+03A3..U+03A9->U+03C3..U+03C9, U+03B1..U+03C1, U+03C3..U+03C9, U+0E01..U+0E2E,U+0E30..U+0E3A, U+0E40..U+0E45, U+0E47, U+0E50..U+0E59, U+A000..U+A48F, U+4E00..U+9FBF,U+3400..U+4DBF, U+20000..U+2A6DF, U+F900..U+FAFF, U+2F800..U+2FA1F, U+2E80..U+2EFF,U+2F00..U+2FDF, U+3100..U+312F, U+31A0..U+31BF, U+3040..U+309F, U+30A0..U+30FF,U+31F0..U+31FF, U+AC00..U+D7AF, U+1100..U+11FF, U+3130..U+318F, U+A000..U+A48F,U+A490..U+A4CF
ngram_len = 1
ngram_chars = U+4E00..U+9FBF, U+3400..U+4DBF, U+20000..U+2A6DF, U+F900..U+FAFF,U+2F800..U+2FA1F, U+2E80..U+2EFF, U+2F00..U+2FDF, U+3100..U+312F, U+31A0..U+31BF,U+3040..U+309F, U+30A0..U+30FF,U+31F0..U+31FF, U+AC00..U+D7AF, U+1100..U+11FF,U+3130..U+318F, U+A000..U+A48F, U+A490..U+A4CF
}
common
{
}
indexer
{
mem_limit = 128M
}
searchd
{
#php
listen = 9312
#mysql
listen = 9306:mysql41
log = /usr/local/sphinx2/var/log/searchd .log
query_log = /usr/local/sphinx2/var/log/query .log
query_log_format = sphinxql
read_timeout = 5
client_timeout = 300
max_children = 30
persistent_connections_limit = 30
pid_file = /usr/local/sphinx2/var/log/searchd .pid
seamless_rotate = 1
preopen_indexes = 1
unlink_old = 1
mva_updates_pool = 1M
max_packet_size = 8M
max_filters = 256
max_filter_values = 4096
max_batch_queries = 32
workers = threads # for RT to work
}
|
启动sphinx
1
2
3
|
[root@MevHost ~] # pkill searchd
[root@MevHost ~] # /usr/local/sphinx2/bin/indexer --config /usr/local/sphinx2/etc/sphinx.conf --all
[root@MevHost ~] # /usr/local/sphinx2/bin/searchd --config /usr/local/sphinx2/etc/sphinx.conf
|
如果出现下面的报错
"Oops! It seems that sphinx was built with wrong endianess (cross-compiling?)
either reconfigure and rebuild, defining ac_cv_c_bigendian=no in the environment of
./configure script,
either ensure that '#define USE_LITTLE_ENDIAN = 1' in config/config.h"
我是直接把sphinx下面的config/config.h 改成了他提示的这个 #define USE_LITTLE_ENDIAN = 1,之后make的,
接下来的这段是我们的PHP代码了
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
|
<!DOCTYPE html>
<html lang= "en" >
<head>
<meta charset= "UTF-8" >
<title>Document</title>
</head>
<body>
<form method= "post" action= 'test.php' >
<p>输入:</p> <input type= "text" name= "q" autocomplete= "false" >
</form>
</body>
</html>
<?php
// phpinfo();die;
ini_set ( 'display_errors' , '1' );
error_reporting (E_ALL);
header( "Content-type: text/html; charset=utf-8" );
if ( $_POST ){
$b_time = microtime(true);
$key = $_POST [ 'q' ];
$index = "users" ;
//========================================分词
$so = scws_new();
$so ->set_charset( 'utf-8' );
//默认词库
$so ->add_dict( ini_get ( 'scws.default.fpath' ) . '/dict.utf8.xdb' );
//自定义词库
// $so->add_dict('./dd.txt',SCWS_XDICT_TXT);
//默认规则
$so ->set_rule( ini_get ( 'scws.default.fpath' ) . '/rules.utf8.ini' );
//设定分词返回结果时是否去除一些特殊的标点符号
$so ->set_ignore(true);
//设定分词返回结果时是否复式分割,如“中国人”返回“中国+人+中国人”三个词。
// 按位异或的 1 | 2 | 4 | 8 分别表示: 短词 | 二元 | 主要单字 | 所有单字
//1,2,4,8 分别对应常量 SCWS_MULTI_SHORT SCWS_MULTI_DUALITY SCWS_MULTI_ZMAIN SCWS_MULTI_ZALL
$so ->set_multi(false);
//设定是否将闲散文字自动以二字分词法聚合
$so ->set_duality(false);
//设定搜索词
$so ->send_text( $key );
$words_array = $so ->get_result();
$words = "" ;
foreach ( $words_array as $v )
{
$words = $words . '|(' . $v [ 'word' ]. ')' ;
}
//加入全词
# $words = '(' . $key . ')' . $words ;
$words = trim( $words , '|' );
$so ->close();
echo '<p>输入:' . $key . '</p>' . "\\r\\n" ;
echo '<p>分词:' . $words . '</p>' . "\\r\\n" ;
//========================================搜索
$sc = new SphinxClient();
$sc ->SetServer( '127.0.0.1' ,9312);
# $sc ->SetMatchMode(SPH_MATCH_ALL);
$sc ->SetMatchMode(SPH_MATCH_ANY);
$sc ->SetArrayResult(TRUE);
$res = $sc ->Query( $words , $index );
echo "<hr>" ;
echo "<pre>" ;
print_r( $res );
$e_time = microtime(true);
$time = $e_time - $b_time ;
echo $time ;
}
exit ;
?>
|
sphinx 配置文件解析
- source:数据源,数据是从什么地方来的。
- index:索引,当有数据源之后,从数据源处构建索引。索引实际上就是相当于一个字典检索。有了整本字典内容以后,才会有字典检索。
- searchd:提供搜索查询服务。它一般是以deamon的形式运行在后台的。
- indexer:构建索引的服务。当要重新构建索引的时候,就是调用indexer这个命令。
- attr:属性,属性是存在索引中的,它不进行全文索引,但是可以用于过滤和排序。
sphinx.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
|