RelatedRorController.php 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. <?php
  2. namespace Drupal\related_ror\Controller;
  3. use Drupal\Core\Controller\ControllerBase;
  4. use Symfony\Component\HttpFoundation\Response;
  5. use Symfony\Component\HttpFoundation\JsonResponse;
  6. use Drupal\Core\Cache\CacheableJsonResponse;
  7. use Drupal\Core\Cache\CacheableMetadata;
  8. use Symfony\Component\DependencyInjection\ContainerInterface;
  9. use Drupal\Core\Template\TwigEnvironment;
  10. use Drupal\node\NodeInterface;
  11. class RelatedRorController extends ControllerBase {
  12. protected $state;
  13. protected $twig;
  14. public function __construct($state, TwigEnvironment $twig) {
  15. $this->state = $state;
  16. $this->twig = $twig;
  17. }
  18. public static function create(ContainerInterface $container) {
  19. return new static(
  20. $container->get('state'),
  21. $container->get('twig')
  22. );
  23. }
  24. private function nodeToLinkdata($node_entity) {
  25. $arr = $node_entity->toArray();
  26. if(count($arr['body']) > 0) { // c'e' gente che pubblica articoli senza body
  27. $body = $arr['body'][0]['value'];
  28. } else {
  29. $body = '';
  30. }
  31. return array(
  32. 'nid' => intval($arr['nid'] [0] ['value']),
  33. 'title' => $arr['title'][0]['value'],
  34. //'body' => $arr['body'][0] ['value'],
  35. 'summary' => htmlspecialchars(substr(
  36. html_entity_decode(strip_tags($body)), 0, 3500), ENT_XML1, 'UTF-8'),
  37. 'url' => $arr['path'][0] ['alias']
  38. );
  39. }
  40. private function queryNearInTime(int $time, int $days = 7, int $limit = 0): array {
  41. $arg = array('ror_news', 'redazionali', 'news_trasmissioni');
  42. $query = \Drupal::entityQuery('node');
  43. $query = \Drupal::database()->select('node', 'n');
  44. $query->addJoin('INNER', 'node__field_tx_date', 'dt', 'n.nid=dt.entity_id');
  45. $query->addJoin('INNER', 'node_field_data', 'field', 'n.nid=field.nid');
  46. $query ->addField('n', 'nid');
  47. $query->addExpression("CAST(UNIX_TIMESTAMP(field_tx_date_value) as UNSIGNED)", 'ts');
  48. $query->addExpression("ABS(cast(UNIX_TIMESTAMP(field_tx_date_value) as signed) - $time)", 'dist');
  49. $query->condition('field.status', '1');
  50. $query->condition('n.type', $arg, 'IN');
  51. $query->where('CAST(UNIX_TIMESTAMP(dt.field_tx_date_value) as UNSIGNED) > :from', array('from' => $time - 3600*24*$days));
  52. $query->where('CAST(UNIX_TIMESTAMP(dt.field_tx_date_value) as UNSIGNED) < :to', array('to' => $time + 3600*24*$days));
  53. $query->orderBy('dist', 'ASC');
  54. if($limit > 0) {
  55. $query->range(0, $limit);
  56. }
  57. if($query->preExecute() !== TRUE) {
  58. return null;
  59. }
  60. $nids = $query->execute()->fetchCol(0);
  61. return $nids;
  62. }
  63. private function presentNids(array $nids) {
  64. $nodes_e = \Drupal\node\Entity\Node::loadMultiple($nids);
  65. $data = ['nodes' => []];
  66. foreach($nodes_e as $nid => $node) {
  67. $nodedata = $this->nodeToLinkdata($node);
  68. array_push($data['nodes'], $nodedata);
  69. }
  70. return $data;
  71. }
  72. public function relatedTime() {
  73. $nid = \Drupal::request()->query->get('nid');
  74. if($nid == null) {
  75. return new Response("Must supply a NID", 400, array('Content-Type' => 'text/plain'));
  76. }
  77. if(!is_numeric($nid)) {
  78. return new Response("NID must be integer, not `$nid`", 400, array('Content-Type' => 'text/plain'));
  79. }
  80. $nid = intval($nid);
  81. $node = \Drupal\node\Entity\Node::load($nid);
  82. if($node == null) {
  83. return new Response("Node not found", 404, array('Content-Type' => 'text/plain'));
  84. }
  85. if($this->isFilteredOut($node)) {
  86. return new Response("Node has no similarity", 404, array('Content-Type' => 'text/plain'));
  87. }
  88. $time = intval($node->getCreatedTime());
  89. $nearnodes = $this->presentNids(array_filter(
  90. $this->queryNearInTime($time, $days=4, $limit=20),
  91. function ($n) use ($nid): bool { return intval($n) != $nid; }));
  92. $resp = $this->cachedJsonResp(array('time' => $nearnodes));
  93. return $resp;
  94. }
  95. public function relatedTrx() {
  96. // TODO: get node time
  97. // TODO: queryNearInTime($time)
  98. $nid = \Drupal::request()->query->get('nid');
  99. if($nid == null) {
  100. return new Response("Must supply a NID", 400, array('Content-Type' => 'text/plain'));
  101. }
  102. $content = "asd $nid";
  103. $resp = new Response($content, 500, array( 'Content-Type' => 'text/plain'));
  104. return $resp;
  105. }
  106. private function getTermWeight(int $termid): int {
  107. $term = \Drupal\taxonomy\Entity\Term::load($termid);
  108. $query = \Drupal::database()->select('taxonomy_index', 'ti');
  109. $query->fields('ti', ['nid']);
  110. $query->condition('ti.tid', $termid);
  111. $cnt = $query->countQuery()->execute()->fetchField();
  112. if($cnt < 100) {
  113. return 2;
  114. }
  115. return 1;
  116. }
  117. private function getNodeSimilarTags(array $orig) : array {
  118. if(!array_key_exists('field_tags', $orig) || count($orig['field_tags']) == 0 ||
  119. !array_key_exists('field_tags', $other) || count($other['field_tags']) == 0) {
  120. return [];
  121. }
  122. $orig_tags = array_map(function($t) { return $t['target_id']; }, $orig['field_tags']);
  123. foreach($orig_tags as $tag) {
  124. if($this->similarity[$tag] === NULL) {
  125. continue;
  126. }
  127. foreach($this->similarity[$tag] as $similar => $score) {
  128. // qui possono esserci duplicati; non e' un problema, perche' se capita e' segno che quel tag e' davvero simile
  129. if(!in_array($similar, $orig_tags) ) {
  130. if(!key_exists($similar, $this->similar_orig_tags)) {
  131. $this->similar_orig_tags[ $similar ] = $score;
  132. }
  133. }
  134. }
  135. }
  136. return $similar_orig_tags;
  137. }
  138. private function getSimilarity(array $orig, array $other): int {
  139. if(!array_key_exists('field_tags', $orig) || count($orig['field_tags']) == 0 ||
  140. !array_key_exists('field_tags', $other) || count($other['field_tags']) == 0) {
  141. return 0;
  142. }
  143. $orig_tags = array_map(function($t) { return $t['target_id']; }, $orig['field_tags']);
  144. $other_tags = array_map(function($t) { return $t['target_id']; }, $other['field_tags']);
  145. $both = array_intersect($orig_tags, $other_tags);
  146. $weighted = array_map([$this, 'getTermWeight'], $both);
  147. // qui prendiamo tutti i tag simili a quelli dell'$orig (da un file json) e vedere se $other li ha
  148. $similar_orig_tags = $this->getNodeSimilarTags($orig);
  149. $similar_both = array_intersect(array_keys($similar_orig_tags), $other_tags);
  150. $similar_weighted = [];
  151. if(count($similar_both) > 0) {
  152. foreach($similar_both as $x) {
  153. // avere un termine simile non conta come avere un termine uguale; guardiamo cosa dice il file json
  154. array_push($similar_weighted, $this->getTermWeight($x) * $similar_orig_tags[$x]);
  155. }
  156. }
  157. return array_sum($weighted) + array_sum($similar_weighted) || 0;
  158. }
  159. private function cachedJsonResp(array $data): Response {
  160. $resp = new JsonResponse($data, 200, ['cache-control' => 'public, max-age=10800']);
  161. return $resp;
  162. }
  163. private function isFilteredOut(\Drupal\node\Entity\Node $node) {
  164. if($node->getType() === 'page')
  165. return true;
  166. return false;
  167. }
  168. public function relatedTopic() {
  169. // TODO: get node time
  170. // TODO: queryNearInTime($time)
  171. $json_path = '../temp/similarity.json';
  172. if(file_exists($json_path)) {
  173. $this->similarity = json_decode(file_get_contents($json_path), TRUE);
  174. if($this->similarity === null) {
  175. \Drupal::logger('related_ror')->error('errore' . json_last_error() .' nel leggere file json ' . realpath($json_path));
  176. $this->similarity = [];
  177. }
  178. } else {
  179. \Drupal::logger('related_ror')->info('file JSON non trovato in ' . realpath($json_path));
  180. $this->similarity = [];
  181. }
  182. $nid = \Drupal::request()->query->get('nid');
  183. if($nid == null) {
  184. return new Response("Must supply a NID", 400, array('Content-Type' => 'text/plain'));
  185. }
  186. if(!is_numeric($nid)) {
  187. return new Response("NID must be integer, not `$nid`", 400, array('Content-Type' => 'text/plain'));
  188. }
  189. $nid = intval($nid);
  190. $node = \Drupal\node\Entity\Node::load($nid);
  191. if($node == null) {
  192. return new Response("Node not found", 404, array('Content-Type' => 'text/plain'));
  193. }
  194. if($this->isFilteredOut($node)) {
  195. return new Response("Node similarity disallowed", 403, array('Content-Type' => 'text/plain'));
  196. }
  197. $orig_arr = $node->toArray();
  198. $time = intval($node->getCreatedTime());
  199. $goodnids = [];
  200. $scores = [];
  201. $other_e = \Drupal\node\Entity\Node::loadMultiple($this->queryNearInTime($time, $days=30, $limit=100));
  202. $this->similar_orig_tags = $this->getNodeSimilarTags($orig_arr);
  203. foreach($other_e as $other_nid => $other_node) {
  204. if(intval($other_nid) === $nid) {
  205. continue;
  206. }
  207. $score = $this->getSimilarity($orig_arr, $other_node->toArray());
  208. if($score > 0) {
  209. $scores[$other_nid] = $score;
  210. }
  211. }
  212. arsort($scores);
  213. foreach($scores as $nid => $score) {
  214. array_push($goodnids, $nid);
  215. }
  216. $resp = $this->cachedJsonResp(['topic' => $this->presentNids($goodnids)]);
  217. return $resp;
  218. }
  219. }