From 2226a2e8990d5968e72aba8b46379859577acce6 Mon Sep 17 00:00:00 2001 From: boyska Date: Mon, 15 Oct 2018 08:18:59 +0200 Subject: [PATCH] initial commit --- .gitignore | 1 + README.md | 1 + related_ror.info.yml | 9 ++ related_ror.routing.yml | 27 ++++ src/Controller/RelatedRorController.php | 176 ++++++++++++++++++++++++ 5 files changed, 214 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 related_ror.info.yml create mode 100644 related_ror.routing.yml create mode 100644 src/Controller/RelatedRorController.php diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..726c3ca --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.*.sw* diff --git a/README.md b/README.md new file mode 100644 index 0000000..012d8eb --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +TODO: block using https://drupal.stackexchange.com/a/245759 diff --git a/related_ror.info.yml b/related_ror.info.yml new file mode 100644 index 0000000..8a1890d --- /dev/null +++ b/related_ror.info.yml @@ -0,0 +1,9 @@ +name: Podcast per ROR +type: module +description: 'Podcast specifici per radio ondarossa' +core: 8.x +version: 0.1 +package: Ondarossa +dependencies: + - field + - link diff --git a/related_ror.routing.yml b/related_ror.routing.yml new file mode 100644 index 0000000..cc0e146 --- /dev/null +++ b/related_ror.routing.yml @@ -0,0 +1,27 @@ +related_ror.time: + path: '/related/time.json' + defaults: + _controller: '\Drupal\related_ror\Controller\RelatedRorController::relatedTime' + _title: 'Get near-in-time articles' + requirements: + _access: 'TRUE' + options: + no_cache: 'TRUE' +related_ror.trx: + path: '/related/trx.json' + defaults: + _controller: '\Drupal\related_ror\Controller\RelatedRorController::relatedTrx' + _title: 'Post from the same show' + requirements: + _access: 'TRUE' + options: + no_cache: 'TRUE' +related_ror.topic: + path: '/related/topic.json' + defaults: + _controller: '\Drupal\related_ror\Controller\RelatedRorController::relatedTopic' + _title: 'Posts with similar topic' + requirements: + _access: 'TRUE' + options: + no_cache: 'TRUE' diff --git a/src/Controller/RelatedRorController.php b/src/Controller/RelatedRorController.php new file mode 100644 index 0000000..39f8de6 --- /dev/null +++ b/src/Controller/RelatedRorController.php @@ -0,0 +1,176 @@ +state = $state; + $this->twig = $twig; + } + + public static function create(ContainerInterface $container) { + return new static( + $container->get('state'), + $container->get('twig') + ); + } + private function nodeToLinkdata($node_entity) { + $arr = $node_entity->toArray(); + return array( + 'nid' => intval($arr['nid'] [0] ['value']), + 'title' => $arr['title'][0]['value'], + //'body' => $arr['body'][0] ['value'], + 'summary' => htmlspecialchars(substr( + html_entity_decode(strip_tags($arr['body'][0]['value'])), 0, 3500), ENT_XML1, 'UTF-8'), + 'url' => $arr['path'][0] ['alias'] + ); + } + + private function queryNearInTime(int $time, int $days = 7, int $limit = 0): array { + $arg = array('ror_news', 'redazionali', 'news_trasmissioni'); + $query = \Drupal::entityQuery('node'); + $query = \Drupal::database()->select('node', 'n'); + $query->addJoin('INNER', 'node__field_tx_date', 'dt', 'n.nid=dt.entity_id'); + $query->addJoin('INNER', 'node_field_data', 'field', 'n.nid=field.nid'); + $query ->addField('n', 'nid'); + $query->addExpression("CAST(UNIX_TIMESTAMP(field_tx_date_value) as UNSIGNED)", 'ts'); + $query->addExpression("ABS(cast(UNIX_TIMESTAMP(field_tx_date_value) as signed) - $time)", 'dist'); + $query->condition('field.status', '1'); + $query->condition('n.type', $arg, 'IN'); + $query->where('CAST(UNIX_TIMESTAMP(dt.field_tx_date_value) as UNSIGNED) > :from', array('from' => $time - 3600*24*$days)); + $query->where('CAST(UNIX_TIMESTAMP(dt.field_tx_date_value) as UNSIGNED) < :to', array('to' => $time + 3600*24*$days)); + $query->orderBy('dist', 'ASC'); + if($limit > 0) { + $query->range(0, $limit); + } + if($query->preExecute() !== TRUE) { + return null; + } + $nids = $query->execute()->fetchCol(0); + return $nids; + } + + private function presentNids(array $nids) { + $nodes_e = \Drupal\node\Entity\Node::loadMultiple($nids); + $data = ['nodes' => []]; + foreach($nodes_e as $nid => $node) { + $nodedata = $this->nodeToLinkdata($node); + array_push($data['nodes'], $nodedata); + } + + return $data; + } + + public function relatedTime() { + $nid = \Drupal::request()->query->get('nid'); + if($nid == null) { + return new Response("Must supply a NID", 400, array('Content-Type' => 'text/plain')); + } + if(!is_numeric($nid)) { + return new Response("NID must be integer, not `$nid`", 400, array('Content-Type' => 'text/plain')); + } + $nid = intval($nid); + $node = \Drupal\node\Entity\Node::load($nid); + if($node == null) { + return new Response("Node not found", 400, array('Content-Type' => 'text/plain')); + } + $time = intval($node->getCreatedTime()); + $content .= "\ntime={$time}"; + $nearnodes = $this->presentNids(array_filter( + $this->queryNearInTime($time, $days=4, $limit=20), + function ($n) use ($nid): bool { return intval($n) != $nid; })); + $resp = $this->cachedJsonResp(array('time' => $nearnodes)); + return $resp; + } + + public function relatedTrx() { + // TODO: get node time + // TODO: queryNearInTime($time) + $nid = \Drupal::request()->query->get('nid'); + if($nid == null) { + return new Response("Must supply a NID", 400, array('Content-Type' => 'text/plain')); + } + $content = "asd $nid"; + $resp = new Response($content, 500, array( 'Content-Type' => 'text/plain')); + return $resp; + } + + private function getTermWeight(int $termid): int { + $term = \Drupal\node\Entity\Term::load($termid); + $query = \Drupal::database()->select('taxonomy_index', 'ti'); + $query->fields('ti', ['nid']); + $query->condition('ti.tid', $term_id); + $cnt = $query->execute()->rowCount(); + if($cnt < 100) { + return 2; + } + return 1; + } + + private function getSimilarity(array $orig, array $other): int { + $orig_tags = array_map(function($t) { return $t['target_id']; }, $orig['field_tags']); + $other_tags = array_map(function($t) { return $t['target_id']; }, $other['field_tags']); + $both = array_intersect($orig_tags, $other_tags); + $weighted = array_map($this->getTermWeight, $both); + return array_sum($weighted); + } + + private function cachedJsonResp(array $data) { + $data['#cache'] = [ + 'max-age' => 600, + 'contexts' => [ + 'url', + ]]; + $resp = new CacheableJsonResponse($data); + $resp->addCacheableDependency(CacheableMetadata::createFromRenderArray($data)); + return $resp; + } + + public function relatedTopic() { + // TODO: get node time + // TODO: queryNearInTime($time) + $nid = \Drupal::request()->query->get('nid'); + if($nid == null) { + return new Response("Must supply a NID", 400, array('Content-Type' => 'text/plain')); + } + if(!is_numeric($nid)) { + return new Response("NID must be integer, not `$nid`", 400, array('Content-Type' => 'text/plain')); + } + $nid = intval($nid); + $node = \Drupal\node\Entity\Node::load($nid); + if($node == null) { + return new Response("Node not found", 400, array('Content-Type' => 'text/plain')); + } + $orig_arr = $node->toArray(); + $time = intval($node->getCreatedTime()); + + $goodnids = []; + $scores = []; + $other_e = \Drupal\node\Entity\Node::loadMultiple($this->queryNearInTime($time, $days=30, $limit=100)); + foreach($other_e as $other_nid => $other_node) { + if(intval($other_nid) === $nid) { + continue; + } + $score = $this->getSimilarity($orig_arr, $other_node->toArray()); + if($score > 0) { + $scores[$other_nid] = $score; + } + } + arsort($scores); + foreach($scores as $nid => $score) { + array_push($goodnids, $nid); + } + $resp = $this->cachedJsonResp(['topic' => $this->presentNids($goodnids)]); + return $resp; + } +}