123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239 |
- <?php
- namespace Drupal\related_ror\Controller;
- use Drupal\Core\Controller\ControllerBase;
- use Symfony\Component\HttpFoundation\Response;
- use Symfony\Component\HttpFoundation\JsonResponse;
- use Drupal\Core\Cache\CacheableJsonResponse;
- use Drupal\Core\Cache\CacheableMetadata;
- use Symfony\Component\DependencyInjection\ContainerInterface;
- use Drupal\Core\Template\TwigEnvironment;
- use Drupal\node\NodeInterface;
- class RelatedRorController extends ControllerBase {
- protected $state;
- protected $twig;
- public function __construct($state, TwigEnvironment $twig) {
- $this->state = $state;
- $this->twig = $twig;
- }
- public static function create(ContainerInterface $container) {
- return new static(
- $container->get('state'),
- $container->get('twig')
- );
- }
- private function nodeToLinkdata($node_entity) {
- $arr = $node_entity->toArray();
- if(count($arr['body']) > 0) { // c'e' gente che pubblica articoli senza body
- $body = $arr['body'][0]['value'];
- } else {
- $body = '';
- }
- return array(
- 'nid' => intval($arr['nid'] [0] ['value']),
- 'title' => $arr['title'][0]['value'],
- //'body' => $arr['body'][0] ['value'],
- 'summary' => htmlspecialchars(substr(
- html_entity_decode(strip_tags($body)), 0, 3500), ENT_XML1, 'UTF-8'),
- 'url' => $arr['path'][0] ['alias']
- );
- }
- private function queryNearInTime(int $time, int $days = 7, int $limit = 0): array {
- $arg = array('ror_news', 'redazionali', 'news_trasmissioni');
- $query = \Drupal::entityQuery('node');
- $query = \Drupal::database()->select('node', 'n');
- $query->addJoin('INNER', 'node__field_tx_date', 'dt', 'n.nid=dt.entity_id');
- $query->addJoin('INNER', 'node_field_data', 'field', 'n.nid=field.nid');
- $query ->addField('n', 'nid');
- $query->addExpression("CAST(UNIX_TIMESTAMP(field_tx_date_value) as UNSIGNED)", 'ts');
- $query->addExpression("ABS(cast(UNIX_TIMESTAMP(field_tx_date_value) as signed) - $time)", 'dist');
- $query->condition('field.status', '1');
- $query->condition('n.type', $arg, 'IN');
- $query->where('CAST(UNIX_TIMESTAMP(dt.field_tx_date_value) as UNSIGNED) > :from', array('from' => $time - 3600*24*$days));
- $query->where('CAST(UNIX_TIMESTAMP(dt.field_tx_date_value) as UNSIGNED) < :to', array('to' => $time + 3600*24*$days));
- $query->orderBy('dist', 'ASC');
- if($limit > 0) {
- $query->range(0, $limit);
- }
- if($query->preExecute() !== TRUE) {
- return null;
- }
- $nids = $query->execute()->fetchCol(0);
- return $nids;
- }
- private function presentNids(array $nids) {
- $nodes_e = \Drupal\node\Entity\Node::loadMultiple($nids);
- $data = ['nodes' => []];
- foreach($nodes_e as $nid => $node) {
- $nodedata = $this->nodeToLinkdata($node);
- array_push($data['nodes'], $nodedata);
- }
- return $data;
- }
- public function relatedTime() {
- $nid = \Drupal::request()->query->get('nid');
- if($nid == null) {
- return new Response("Must supply a NID", 400, array('Content-Type' => 'text/plain'));
- }
- if(!is_numeric($nid)) {
- return new Response("NID must be integer, not `$nid`", 400, array('Content-Type' => 'text/plain'));
- }
- $nid = intval($nid);
- $node = \Drupal\node\Entity\Node::load($nid);
- if($node == null) {
- return new Response("Node not found", 404, array('Content-Type' => 'text/plain'));
- }
- if($this->isFilteredOut($node)) {
- return new Response("Node has no similarity", 404, array('Content-Type' => 'text/plain'));
- }
- $time = intval($node->getCreatedTime());
- $nearnodes = $this->presentNids(array_filter(
- $this->queryNearInTime($time, $days=4, $limit=20),
- function ($n) use ($nid): bool { return intval($n) != $nid; }));
- $resp = $this->cachedJsonResp(array('time' => $nearnodes));
- return $resp;
- }
- public function relatedTrx() {
- // TODO: get node time
- // TODO: queryNearInTime($time)
- $nid = \Drupal::request()->query->get('nid');
- if($nid == null) {
- return new Response("Must supply a NID", 400, array('Content-Type' => 'text/plain'));
- }
- $content = "asd $nid";
- $resp = new Response($content, 500, array( 'Content-Type' => 'text/plain'));
- return $resp;
- }
- private function getTermWeight(int $termid): int {
- $term = \Drupal\taxonomy\Entity\Term::load($termid);
- $query = \Drupal::database()->select('taxonomy_index', 'ti');
- $query->fields('ti', ['nid']);
- $query->condition('ti.tid', $termid);
- $cnt = $query->countQuery()->execute()->fetchField();
- if($cnt < 100) {
- return 2;
- }
- return 1;
- }
- private function getNodeSimilarTags(array $orig) : array {
- if(!array_key_exists('field_tags', $orig) || count($orig['field_tags']) == 0 ||
- !array_key_exists('field_tags', $other) || count($other['field_tags']) == 0) {
- return [];
- }
- $orig_tags = array_map(function($t) { return $t['target_id']; }, $orig['field_tags']);
- foreach($orig_tags as $tag) {
- if($this->similarity[$tag] === NULL) {
- continue;
- }
- foreach($this->similarity[$tag] as $similar => $score) {
- // qui possono esserci duplicati; non e' un problema, perche' se capita e' segno che quel tag e' davvero simile
- if(!in_array($similar, $orig_tags) ) {
- if(!key_exists($similar, $this->similar_orig_tags)) {
- $this->similar_orig_tags[ $similar ] = $score;
- }
- }
- }
- }
- return $similar_orig_tags;
- }
- private function getSimilarity(array $orig, array $other): int {
- if(!array_key_exists('field_tags', $orig) || count($orig['field_tags']) == 0 ||
- !array_key_exists('field_tags', $other) || count($other['field_tags']) == 0) {
- return 0;
- }
- $orig_tags = array_map(function($t) { return $t['target_id']; }, $orig['field_tags']);
- $other_tags = array_map(function($t) { return $t['target_id']; }, $other['field_tags']);
- $both = array_intersect($orig_tags, $other_tags);
- $weighted = array_map([$this, 'getTermWeight'], $both);
- // qui prendiamo tutti i tag simili a quelli dell'$orig (da un file json) e vedere se $other li ha
- $similar_orig_tags = $this->getNodeSimilarTags($orig);
- $similar_both = array_intersect(array_keys($similar_orig_tags), $other_tags);
- $similar_weighted = [];
- if(count($similar_both) > 0) {
- foreach($similar_both as $x) {
- // avere un termine simile non conta come avere un termine uguale; guardiamo cosa dice il file json
- array_push($similar_weighted, $this->getTermWeight($x) * $similar_orig_tags[$x]);
- }
- }
- return array_sum($weighted) + array_sum($similar_weighted) || 0;
- }
- private function cachedJsonResp(array $data): Response {
- $resp = new JsonResponse($data, 200, ['cache-control' => 'public, max-age=10800']);
- return $resp;
- }
- private function isFilteredOut(\Drupal\node\Entity\Node $node) {
- if($node->getType() === 'page')
- return true;
- return false;
- }
- public function relatedTopic() {
- // TODO: get node time
- // TODO: queryNearInTime($time)
- $json_path = '../temp/similarity.json';
- if(file_exists($json_path)) {
- $this->similarity = json_decode(file_get_contents($json_path), TRUE);
- if($this->similarity === null) {
- \Drupal::logger('related_ror')->error('errore' . json_last_error() .' nel leggere file json ' . realpath($json_path));
- $this->similarity = [];
- }
- } else {
- \Drupal::logger('related_ror')->info('file JSON non trovato in ' . realpath($json_path));
- $this->similarity = [];
- }
- $nid = \Drupal::request()->query->get('nid');
- if($nid == null) {
- return new Response("Must supply a NID", 400, array('Content-Type' => 'text/plain'));
- }
- if(!is_numeric($nid)) {
- return new Response("NID must be integer, not `$nid`", 400, array('Content-Type' => 'text/plain'));
- }
- $nid = intval($nid);
- $node = \Drupal\node\Entity\Node::load($nid);
- if($node == null) {
- return new Response("Node not found", 404, array('Content-Type' => 'text/plain'));
- }
- if($this->isFilteredOut($node)) {
- return new Response("Node similarity disallowed", 403, array('Content-Type' => 'text/plain'));
- }
- $orig_arr = $node->toArray();
- $time = intval($node->getCreatedTime());
- $goodnids = [];
- $scores = [];
- $other_e = \Drupal\node\Entity\Node::loadMultiple($this->queryNearInTime($time, $days=30, $limit=100));
- $this->similar_orig_tags = $this->getNodeSimilarTags($orig_arr);
- foreach($other_e as $other_nid => $other_node) {
- if(intval($other_nid) === $nid) {
- continue;
- }
- $score = $this->getSimilarity($orig_arr, $other_node->toArray());
- if($score > 0) {
- $scores[$other_nid] = $score;
- }
- }
- arsort($scores);
- foreach($scores as $nid => $score) {
- array_push($goodnids, $nid);
- }
- $resp = $this->cachedJsonResp(['topic' => $this->presentNids($goodnids)]);
- return $resp;
- }
- }
|