introduce similarita' tag avanzata

si basa su uno script esterno che permette di decidere se due tag sono
simili, e quanto
This commit is contained in:
root 2019-11-17 20:02:59 +01:00
parent ab49c7bdb1
commit 2ed42d533f

View file

@ -124,6 +124,28 @@ class RelatedRorController extends ControllerBase {
return 1;
}
private function getNodeSimilarTags(array $orig) : array {
if(!array_key_exists('field_tags', $orig) || count($orig['field_tags']) == 0 ||
!array_key_exists('field_tags', $other) || count($other['field_tags']) == 0) {
return [];
}
$orig_tags = array_map(function($t) { return $t['target_id']; }, $orig['field_tags']);
foreach($orig_tags as $tag) {
if($this->similarity[$tag] === NULL) {
continue;
}
foreach($this->similarity[$tag] as $similar => $score) {
// qui possono esserci duplicati; non e' un problema, perche' se capita e' segno che quel tag e' davvero simile
if(!in_array($similar, $orig_tags) ) {
if(!key_exists($similar, $this->similar_orig_tags)) {
$this->similar_orig_tags[ $similar ] = $score;
}
}
}
}
return $similar_orig_tags;
}
private function getSimilarity(array $orig, array $other): int {
if(!array_key_exists('field_tags', $orig) || count($orig['field_tags']) == 0 ||
!array_key_exists('field_tags', $other) || count($other['field_tags']) == 0) {
@ -133,11 +155,24 @@ class RelatedRorController extends ControllerBase {
$other_tags = array_map(function($t) { return $t['target_id']; }, $other['field_tags']);
$both = array_intersect($orig_tags, $other_tags);
$weighted = array_map([$this, 'getTermWeight'], $both);
return array_sum($weighted) || 0;
// qui prendiamo tutti i tag simili a quelli dell'$orig (da un file json) e vedere se $other li ha
$similar_orig_tags = $this->getNodeSimilarTags($orig);
$similar_both = array_intersect(array_keys($similar_orig_tags), $other_tags);
$similar_weighted = [];
if(count($similar_both) > 0) {
foreach($similar_both as $x) {
// avere un termine simile non conta come avere un termine uguale; guardiamo cosa dice il file json
array_push($similar_weighted, $this->getTermWeight($x) * $similar_orig_tags[$x]);
}
}
return array_sum($weighted) + array_sum($similar_weighted) || 0;
}
private function cachedJsonResp(array $data): Response {
$resp = new JsonResponse($data, 200, ['cache-control' => 'public, max-age=900']);
$resp = new JsonResponse($data, 200, ['cache-control' => 'public, max-age=10800']);
return $resp;
}
@ -151,6 +186,18 @@ class RelatedRorController extends ControllerBase {
public function relatedTopic() {
// TODO: get node time
// TODO: queryNearInTime($time)
$json_path = '../temp/similarity.json';
if(file_exists($json_path)) {
$this->similarity = json_decode(file_get_contents($json_path), TRUE);
if($this->similarity === null) {
\Drupal::logger('related_ror')->error('errore' . json_last_error() .' nel leggere file json ' . realpath($json_path));
$this->similarity = [];
}
} else {
\Drupal::logger('related_ror')->info('file JSON non trovato in ' . realpath($json_path));
$this->similarity = [];
}
$nid = \Drupal::request()->query->get('nid');
if($nid == null) {
return new Response("Must supply a NID", 400, array('Content-Type' => 'text/plain'));
@ -172,6 +219,7 @@ class RelatedRorController extends ControllerBase {
$goodnids = [];
$scores = [];
$other_e = \Drupal\node\Entity\Node::loadMultiple($this->queryNearInTime($time, $days=30, $limit=100));
$this->similar_orig_tags = $this->getNodeSimilarTags($orig_arr);
foreach($other_e as $other_nid => $other_node) {
if(intval($other_nid) === $nid) {
continue;