['pipe', 'r'], 1=>['pipe', 'w']], $pipes); fwrite($pipes[0], $html); fclose($pipes[0]); $out = stream_get_contents($pipes[1]); fclose($pipes[1]); return trim(html_entity_decode($out)); } function extract_metadata_from_html(string $html): array { if(getenv("PUP_BIN") === false) { return []; } $meta = []; $date = strptimestamp( pup_selector($html, ' table td.titoloFTR .small i text{}'), "%A, %b. %d, %Y at %H:%M %p" ); if($date !== false) { $meta['published'] = strftime('%s', $date); } $author = pup_selector($html, ' table td.titoloFTR .small strong text{}'); if($author !== '') { $meta['author'] = $author; } return $meta; } function parse_save(object $db, string $filename, $parser) { try { $ast = $parser->parse(file_get_contents($filename)); } catch (Error $error) { echo "Parse error on $filename: {$error->getMessage()}\n"; return; } $dumper = new NodeDumper; // do magic things now $nid = intval(basename($filename, '.php')); $fparts = explode('/', $filename); $year = $fparts[count($fparts)-3]; $month = $fparts[count($fparts)-2]; $dt = strtotime("01-$month-$year"); $metadata = ['nid' => $nid, 'published' => $dt]; $body = ''; foreach($ast as $part) { if($part instanceof PhpParser\Node\Stmt\Expression) { if($part->expr instanceof PhpParser\Node\Expr\Assign && $part->expr->var instanceof PhpParser\Node\Expr\ArrayDimFetch && $part->expr->var->var instanceof PhpParser\Node\Expr\Variable && $part->expr->var->var->name === 'GLOBALS' ) { if($part->expr->expr instanceof PhpParser\Node\Expr\Array_) { $val = $part->expr->expr->items; } elseif($part->expr->expr instanceof PhpParser\Node\Scalar\String_ ){ $val = iconv('latin1', 'utf8', $part->expr->expr->value); } else { $val = $part->expr->expr->value; } $metadata[$part->expr->var->dim->value] = $val; } }elseif($part instanceof PhpParser\Node\Stmt\InlineHTML ) { $body .= iconv('latin1', 'utf8', $part->value); } } $metadata = array_merge($metadata, extract_metadata_from_html($body)); save($db, $body, $metadata); } function save($db, $body, $metadata) { // print("Loading ". $metadata['nid'] . "\n"); $stm = $db->prepare('INSERT OR REPLACE INTO news(nid, title, author, body, display, published) VALUES (?,?,?,?,?,?)'); if($stm === false) { print("error during INSERT: "); print($db->errorInfo()[2]); return; } $stm->bindParam(1, $metadata['nid']); $stm->bindParam(2, $metadata['page_title']); $stm->bindParam(3, $metadata['author']); $stm->bindParam(4, $body); $stm->bindParam(5, $metadata['page_display']); $stm->bindParam(6, $metadata['published']); $stm->execute(); } $db = new PDO('sqlite:' . $argv[1]); $db->exec('CREATE TABLE IF NOT EXISTS news (nid PRIMARY KEY, body TEXT, author VARCHAR, title VARCHAR, display VARCHAR, published INTEGER, last_modified INTEGER);'); $db->exec('CREATE INDEX IF NOT EXISTS news_published ON news (published);'); $db->exec('CREATE INDEX IF NOT EXISTS news_author ON news (author);'); $parser = (new ParserFactory)->create(ParserFactory::PREFER_PHP5); $i = 0; $db->beginTransaction(); while($f = fgets(STDIN)) { $f = str_replace("\n", '', $f); parse_save($db, $f, $parser); $i++; if($i >= 100) { $db->commit(); $db->beginTransaction(); $i = 0; } } $db->commit(); $db = null; exit(0);