Convertir un flux RSS en flux WXR (WordPress eXtended RSS)
Afin de faciliter l'importation d'article ou la migration de blog vers un blog sous WordPress, voici un petit script qui convertira un flux RSS en flux WordPress eXtended RSS facilitant l'importation de données dans WP.
Exemple d'utilisation : (Vérrouillé)
<?php
// Convertit un flux RSS au format WXR (WordPress)
// pour l'importation de blog / flux de news, etc…
// (c) Luca Lo Valvo
// Last Update: Juillet 2011
// ==== Récupération des variables ====
$urlflux=$_REQUEST['urlflux']; //URL du flux à parser
$auteur=$_REQUEST['auteur']; // Auteur, généralement non dispo dans le flux rss
// ==== FUNCTION =====
// Slug
Function slug($word) {
if(mb_detect_encoding("$word")=="UTF-8")
{
$word=utf8_decode($word);
}
$url_valeur = array ("-", "–","'");
$url_ok = array ("_", "_","_");
$word=str_replace($url_valeur, $url_ok, $word);
$word=html_entity_decode($word);
$search = array ('@[éèêëÊË]@i','@[àâäÂÄ]@i','@[îïÎÏ]@i','@[ûùüÛÜ]@i','@[ôöÔÖ]@i','@[ç]@i','@[ ]@i','@[^a-zA-Z0-9_]@');
$replace = array ('e','a','i','u','o','c','_','');
$word=preg_replace($search, $replace, $word);
$word=html_entity_decode($word);
return $word;
}
// ==== PARSER RSS ====
//On parse le flux XML/RSS
$xml=file_get_contents($urlflux);
$xml=str_replace("im:","",$xml);
$root = simplexml_load_string($xml, 'SimpleXMLElement', LIBXML_NOCDATA);
// On récupère les infos de base du flux
$FluxTitle=$root->channel->title;
$FluxLink=$root->channel->link;
$FluxDescription=$root->channel->description;
$FluxPubDate=$root->channel->pubDate;
//On lit les items du flux
$id=1;
foreach($root->channel->item as $item){
//recupération des donneés du flux
$title = $item->title;
$link = $item->link;
$description = $item->description;
$pubDate = $item->pubDate;
// Autres variable
$slug=slug($title);
//echo "$title <br /><br />";
// Pour chaque ITEM on construit le contenu du WXR
$corp .="<item>n";
$corp .="<title>$title</title>n";
$corp .="<link>$link</link>n";
$corp .="<pubDate>$pubDate</pubDate>n";
$corp .="<dc:creator>$auteur</dc:creator>n";
$corp .="<guid isPermaLink="false">link</guid>n";
$corp .="<description></description>n";
$corp .="<content:encoded><![CDATA[$description]]></content:encoded>n";
$corp .="<excerpt:encoded><![CDATA[]]></excerpt:encoded>n";
$corp .="<wp:post_id>$id</wp:post_id> n";
$corp .="<wp:post_date>$pubDate</wp:post_date> n";
$corp .="<wp:post_date_gmt>$pubDate</wp:post_date_gmt> n";
$corp .="<wp:comment_status>open</wp:comment_status> n";
$corp .="<wp:ping_status>open</wp:ping_status> n";
$corp .="<wp:post_name>$slug</wp:post_name> n";
$corp .="<wp:status>publish</wp:status> n";
$corp .="<wp:post_parent>0</wp:post_parent> n";
$corp .="<wp:menu_order>0</wp:menu_order> n";
$corp .="<wp:post_type>post</wp:post_type> n";
$corp .="<wp:post_password></wp:post_password> n";
$corp .="<wp:is_sticky>0</wp:is_sticky> n";
$corp .="<category domain="category" nicename="Home"><![CDATA[Home]]></category> n";
$corp .="<wp:postmeta> n";
$corp .="<wp:meta_key>_edit_last</wp:meta_key> n";
$corp .="<wp:meta_value><![CDATA[1]]></wp:meta_value> n";
$corp .="</wp:postmeta> n";
$corp .="</item>nn";
//on parse le texte pour sortir la listes des <img>
$result="";
preg_match_all('/<img[^>]+>/i',$description, $result);
$f = 0;
while ($f <= count($result[0])-1) {
//on récupère la valeur du SRC
$src="";
preg_match_all('/src=(["'])(.*?)1/',$result[0][$f],$src);
// Propre à Lifetype
$chaineachercher = "resource=";
$isimglocal= strpos($src[2][0], $chaineachercher);
if ($isimglocal === false)
{
//img non local
$filename=split("/",$src[2][0]);
$filenamepostion=count($filename)-1;
$filename=$filename[$filenamepostion];
}
else
{ //img local
//on travaille le nom du fichier
$filename=split("resource=",$src[2][0]);
$filename=split("&",$filename[1]);
$filename=$filename[0];
//$filename=$filename[1][0];
}
$filename=$filename;
$filetitle=str_replace(".",":",$filename);
$filetitle=split(":",$filetitle);
$filetitle=$filetitle[0];
$corp .="<item> n";
$corp .="<title>$filetitle</title> n";
$corp .="<link>".$src[2][0]."</link> n";
$corp .="<pubDate>Wed, 06 Apr 2011 00:00:00 +0000</pubDate> n";
$corp .="<dc:creator>$auteur</dc:creator> n";
$corp .="<guid isPermaLink="false">".$src[2][0]."</guid> n";
$corp .="<description></description> n";
$corp .="<content:encoded><![CDATA[]]></content:encoded> n";
$corp .="<excerpt:encoded><![CDATA[]]></excerpt:encoded> n";
$corp .="<wp:post_id>$id</wp:post_id> n";
$corp .="<wp:post_date>$pubDate</wp:post_date> n";
$corp .="<wp:post_date_gmt>$pubDate</wp:post_date_gmt> n";
$corp .="<wp:comment_status>open</wp:comment_status> n";
$corp .="<wp:ping_status>open</wp:ping_status> n";
$corp .="<wp:post_name>".$filetitle."</wp:post_name> n";
$corp .="<wp:status>inherit</wp:status> n";
$corp .="<wp:post_parent>0</wp:post_parent> n";
$corp .="<wp:menu_order>0</wp:menu_order> n";
$corp .="<wp:post_type>attachment</wp:post_type> n";
$corp .="<wp:post_password></wp:post_password> n";
$corp .="<wp:is_sticky>0</wp:is_sticky> n";
$corp .="<wp:attachment_url>".$src[2][0]."</wp:attachment_url> n";
$corp .="<wp:postmeta> n";
$corp .=" <wp:meta_key>_wp_attachment_metadata</wp:meta_key> n";
$corp .=" <wp:meta_value><![CDATA[a:6:{s:5:"width";s:3:"621";s:6:"height";s:3:"594";s:14:"hwstring_small";s:23:"height='96' width='100'";s:4:"file";s:18:"2011/04/$filetitle.jpg";s:5:"sizes";a:2:{s:9:"thumbnail";a:3:{s:4:"file";s:18:"$filetitle-150×150.jpg";s:5:"width";s:3:"150";s:6:"height";s:3:"150";}s:6:"medium";a:3:{s:4:"file";s:18:"$filetitle-300×286.jpg";s:5:"width";s:3:"300";s:6:"height";s:3:"286";}}s:10:"image_meta";a:10:{s:8:"aperture";s:1:"0";s:6:"credit";s:0:"";s:6:"camera";s:0:"";s:7:"caption";s:0:"";s:17:"created_timestamp";s:1:"0";s:9:"copyright";s:0:"";s:12:"focal_length";s:1:"0";s:3:"iso";s:1:"0";s:13:"shutter_speed";s:1:"0";s:5:"title";s:0:"";}}]]></wp:meta_value>n";
$corp .="</wp:postmeta> n";
$corp .="<wp:postmeta> n";
$corp .=" <wp:meta_key>_wp_attached_file</wp:meta_key> n";
$corp .=" <wp:meta_value><![CDATA[".$filename."]]></wp:meta_value> n";
$corp .="</wp:postmeta> n";
$corp .="</item> nn";
$f++;
}
$id++;
}
?>
<?php
// ==== ON CONSTRUIT LE FICHIER WXR ====
$header .="<?xml version="1.0" encoding="UTF-8"?> n";
$header .="<rss version="2.0"
xmlns:excerpt="http://wordpress.org/export/1.1/excerpt/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/1.1/"
>n";
$header .="<channel> n";
$header .="<title>$FluxTitle</title> n";
$header .="<link>$FluxLink</link> n";
$header .="<description>$FluxDescription</description> n";
$header .="<language>FR</language> nn";
$header .="<wp:wxr_version>1.1</wp:wxr_version> n";
$header .="<wp:base_site_url>$FluxLink</wp:base_site_url> n";
$header .="<wp:base_blog_url>$FluxLink</wp:base_blog_url> nn";
$header .="<wp:author><wp:author_id>1</wp:author_id><wp:author_login>$auteur</wp:author_login><wp:author_email></wp:author_email><wp:author_display_name><![CDATA[$auteur]]></wp:author_display_name><wp:author_first_name><![CDATA[]]></wp:author_first_name><wp:author_last_name><![CDATA[]]></wp:author_last_name></wp:author>nn";
$header .="<wp:category><wp:term_id>1</wp:term_id><wp:category_nicename>Home</wp:category_nicename><wp:category_parent></wp:category_parent><wp:cat_name><![CDATA[Home]]></wp:cat_name></wp:category>nn";
$header .="<generator>RSS TO WXR By Luca Lo Valvo</generator>nn";
$corp=$corp;
$footer .="</channel> n";
$footer .="</rss> n";
//creation du fichier xml :RSS25derarticles.xml
$str = "$header $corp $footer";
//On affiche le fichier convertit
echo $str;
?>