Browse Source

introduce similarita' tag avanzata

si basa su uno script esterno che permette di decidere se due tag sono
simili, e quanto
root 4 years ago
parent
commit
2ed42d533f
1 changed files with 50 additions and 2 deletions
  1. 50 2
      src/Controller/RelatedRorController.php

+ 50 - 2
src/Controller/RelatedRorController.php

@@ -124,6 +124,28 @@ class RelatedRorController extends ControllerBase {
         return 1;
     }
 
+    private function getNodeSimilarTags(array $orig) : array {
+        if(!array_key_exists('field_tags', $orig) || count($orig['field_tags']) == 0 || 
+            !array_key_exists('field_tags', $other) || count($other['field_tags']) == 0) {
+            return [];
+        }
+        $orig_tags = array_map(function($t) { return $t['target_id']; }, $orig['field_tags']);
+        foreach($orig_tags as $tag) {
+            if($this->similarity[$tag] === NULL) {
+                continue;
+            }
+            foreach($this->similarity[$tag] as $similar => $score) {
+                // qui possono esserci duplicati; non e' un problema, perche' se capita e' segno che quel tag e' davvero simile
+                if(!in_array($similar, $orig_tags) ) {
+                    if(!key_exists($similar, $this->similar_orig_tags)) {
+                        $this->similar_orig_tags[ $similar ] = $score;
+                    }
+                }
+            }
+        }
+        return $similar_orig_tags;
+    }
+
     private function getSimilarity(array $orig, array $other): int {
         if(!array_key_exists('field_tags', $orig) || count($orig['field_tags']) == 0 || 
             !array_key_exists('field_tags', $other) || count($other['field_tags']) == 0) {
@@ -133,11 +155,24 @@ class RelatedRorController extends ControllerBase {
         $other_tags = array_map(function($t) { return $t['target_id']; }, $other['field_tags']);
         $both = array_intersect($orig_tags, $other_tags);
         $weighted = array_map([$this, 'getTermWeight'], $both);
-        return array_sum($weighted) || 0;
+
+        // qui prendiamo tutti i tag simili a quelli dell'$orig (da un file json) e vedere se $other li ha 
+        $similar_orig_tags = $this->getNodeSimilarTags($orig);
+        $similar_both = array_intersect(array_keys($similar_orig_tags), $other_tags);
+        $similar_weighted = [];
+        if(count($similar_both) > 0) {
+            foreach($similar_both as $x) {
+                // avere un termine simile non conta come avere un termine uguale; guardiamo cosa dice il file json
+                array_push($similar_weighted, $this->getTermWeight($x) * $similar_orig_tags[$x]);
+            }
+        }
+
+
+        return array_sum($weighted)  + array_sum($similar_weighted) || 0;
     }
 
     private function cachedJsonResp(array $data): Response {
-        $resp = new JsonResponse($data, 200, ['cache-control' => 'public, max-age=900']);
+        $resp = new JsonResponse($data, 200, ['cache-control' => 'public, max-age=10800']);
         return $resp;
     }
 
@@ -151,6 +186,18 @@ class RelatedRorController extends ControllerBase {
     public function relatedTopic() {
         // TODO: get node time
         // TODO: queryNearInTime($time)
+        $json_path = '../temp/similarity.json';
+        if(file_exists($json_path)) {
+            $this->similarity = json_decode(file_get_contents($json_path), TRUE);
+            if($this->similarity === null) {
+                \Drupal::logger('related_ror')->error('errore' . json_last_error()  .' nel leggere file json ' . realpath($json_path));
+                $this->similarity = [];
+            }
+        } else {
+            \Drupal::logger('related_ror')->info('file JSON non trovato in ' . realpath($json_path));
+            $this->similarity = [];
+        }
+
         $nid = \Drupal::request()->query->get('nid');
         if($nid == null) {
             return new Response("Must supply a NID", 400, array('Content-Type' => 'text/plain'));
@@ -172,6 +219,7 @@ class RelatedRorController extends ControllerBase {
         $goodnids = [];
         $scores = [];
         $other_e = \Drupal\node\Entity\Node::loadMultiple($this->queryNearInTime($time, $days=30, $limit=100));
+        $this->similar_orig_tags = $this->getNodeSimilarTags($orig_arr);
         foreach($other_e as $other_nid => $other_node) {
             if(intval($other_nid) === $nid) {
                 continue;