1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
<?php
/**
* Copyright © Magento, Inc. All rights reserved.
* See COPYING.txt for license details.
*/
declare(strict_types=1);
namespace Magento\Search\Model;
use Magento\Search\Api\SynonymAnalyzerInterface;
/**
* SynonymAnalyzer responsible for search of synonyms matching a word or a phrase.
*/
class SynonymAnalyzer implements SynonymAnalyzerInterface
{
/**
* @var SynonymReader $synReaderModel
*/
protected $synReaderModel;
/**
* Constructor
*
* @param SynonymReader $synReader
*/
public function __construct(SynonymReader $synReader)
{
$this->synReaderModel = $synReader;
}
/**
* Returns an array of arrays consisting of the synonyms found for each word in the input phrase
*
* For phrase: "Elizabeth is the English queen" correct output is an array of arrays containing synonyms for each
* word in the phrase:
*
* [
* 0 => [ 0 => "elizabeth" ],
* 1 => [ 0 => "is" ],
* 2 => [ 0 => "the" ],
* 3 => [ 0 => "british", 1 => "english" ],
* 4 => [ 0 => "queen", 1 => "monarch" ]
* ]
* @param string $phrase
* @return array
* @throws \Magento\Framework\Exception\LocalizedException
*/
public function getSynonymsForPhrase($phrase)
{
$result = [];
if (empty(trim($phrase))) {
return $result;
}
$synonymGroups = $this->getSynonymGroupsByPhrase($phrase);
// Replace multiple spaces in a row with the only one space
$phrase = preg_replace("/ {2,}/", " ", $phrase);
// Go through every returned record looking for presence of the actual phrase. If there were no matching
// records found in DB then create a new entry for it in the returned array
$words = explode(' ', $phrase);
foreach ($words as $offset => $word) {
$synonyms = [$word];
if ($synonymGroups) {
$pattern = $this->getSearchPattern(\array_slice($words, $offset));
$position = $this->findInArray($pattern, $synonymGroups);
if ($position !== null) {
$synonyms = explode(',', $synonymGroups[$position]);
}
}
$result[] = $synonyms;
}
return $result;
}
/**
* Helper method to find the matching of $pattern to $synonymGroupsToExamine.
* If matches, the particular array index is returned.
* Otherwise null will be returned.
*
* @param string $pattern
* @param array $synonymGroupsToExamine
* @return int|null
*/
private function findInArray(string $pattern, array $synonymGroupsToExamine)
{
$position = 0;
foreach ($synonymGroupsToExamine as $synonymGroup) {
$matchingResultCode = preg_match($pattern, $synonymGroup);
if ($matchingResultCode === 1) {
return $position;
}
$position++;
}
return null;
}
/**
* Returns a regular expression to search for synonyms of the phrase represented as the list of words.
*
* Returned pattern contains expression to search for a part of the phrase from the beginning.
*
* For example, in the phrase "Elizabeth is the English queen" with subset from the very first word,
* the method will build an expression which looking for synonyms for all these patterns:
* - Elizabeth is the English queen
* - Elizabeth is the English
* - Elizabeth is the
* - Elizabeth is
* - Elizabeth
*
* For the same phrase on the second iteration with the first word "is" it will match for these synonyms:
* - is the English queen
* - is the English
* - is the
* - is
*
* The pattern looking for exact match and will not find these phrases as synonyms:
* - Is there anybody in the room?
* - Is the English is most popular language?
* - Is the English queen Elizabeth?
*
* Take into account that returned pattern expects that data will be represented as comma-separated value.
*
* @param array $words
* @return string
*/
private function getSearchPattern(array $words): string
{
$patterns = [];
for ($lastItem = count($words); $lastItem > 0; $lastItem--) {
$phrase = implode("\s+", \array_slice($words, 0, $lastItem));
$patterns[] = '^' . $phrase . ',';
$patterns[] = ',' . $phrase . ',';
$patterns[] = ',' . $phrase . '$';
}
$pattern = '/' . implode('|', $patterns) . '/i';
return $pattern;
}
/**
* Get all synonym groups for the phrase
*
* Returns an array of synonyms which are represented as comma-separated value for each item in the list
*
* @param string $phrase
* @return string[]
* @throws \Magento\Framework\Exception\LocalizedException
*/
private function getSynonymGroupsByPhrase(string $phrase): array
{
$result = [];
/** @var array $synonymGroups */
$synonymGroups = $this->synReaderModel->loadByPhrase($phrase)->getData();
foreach ($synonymGroups as $row) {
$result[] = $row['synonyms'];
}
return $result;
}
}