Sample to import page from frontend using XPath
ParsePage(@"http://<pageurl>", PageManager.GetChildrenIDs(Guid.Empty).FirstOrDefault(), DataFacade.GetData<IPageType>().First());
Codepublic static string CacheFolder = "Cache";
public string GetContent(string url)
{
var hash = url.GetHashCode().ToString();
var cachePath = Path.Combine(PathUtil.BaseDirectory, CacheFolder, hash);
if (!File.Exists(cachePath) && Request["onlycache"] == null)
{
var client = new WebClient();
client.Encoding = System.Text.Encoding.UTF8;
client.Headers.Add("user-agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)");
var content = client.DownloadString(url);
Directory.CreateDirectory(Path.GetDirectoryName(cachePath));
File.WriteAllText(cachePath, content, System.Text.Encoding.UTF8); //, System.Text.Encoding.GetEncoding("ISO-8859-1")
return content;
}
return File.ReadAllText(cachePath);
}
protected string GetNewImagePath(string oldpath)
{
//Custom rules, can skip this
var match = Regex.Match(oldpath, @"([^/]+)/([^/]+)/([\d\(\)]+)/(\d+)([^\.]+)\.jpg");
if (match.Success)
{
var newPath = string.Format("/Content/{0}/{1}/{2}-{3}.jpg", match.Groups[1], match.Groups[2], match.Groups[3], match.Groups[4]).Replace("_", " ");
return newPath;
}
return oldpath;
}
private Dictionary<string, IMediaFile> ImportedImages = new Dictionary<string, IMediaFile>();
private IMediaFileFolder ForceMediaFolder(string path)
{
if (string.IsNullOrWhiteSpace(path))
{
return null;
}
if (path == "/") //?
{
return DataFacade.GetData<IMediaFileFolder>(d => d.Path == path).FirstOrDefault();
}
ForceMediaFolder(GetParentPath(path));
var folder = DataFacade.GetData<IMediaFileFolder>().FirstOrDefault(d => d.Path == path);
if (folder == null)
{
folder = DataFacade.BuildNew<IMediaFileFolder>();
folder.Title = Path.GetFileName(path);
folder.Path = path;
folder = DataFacade.AddNew<IMediaFileFolder>(folder);
}
return folder;
}
private Func<string, string> GetParentPath = (path) => path.Substring(0, path.LastIndexOf("/") > 0 ? path.LastIndexOf("/") : 0);
private static string CleanFileName(string fileName)
{
return Path.GetInvalidFileNameChars().Aggregate(fileName, (current, c) => current.Replace(c.ToString(), string.Empty));
}
private static bool Exists(IMediaFile file)
{
return DataFacade.GetData<IMediaFile>().Any(x => x.FolderPath == file.FolderPath && x.FileName == file.FileName);
}
private IMediaFile ImportMedia(string src, string href)
{
var url = new Uri(new Uri(href), src).OriginalString;
if (!ImportedImages.ContainsKey("src"))
{
ImportedImages[src] = null;
var client = new WebClient();
client.Encoding = System.Text.Encoding.UTF8;
client.Headers.Add("user-agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705;)");
var stream = client.OpenRead(url);
var folder = Path.GetDirectoryName(src).Replace(@"\", @"/");
ForceMediaFolder(folder);
var filename = CleanFileName(Path.GetFileName(src));
var title = Path.GetFileNameWithoutExtension(src);
var mediaFile = new WorkflowMediaFile();
mediaFile.FileName = Path.GetFileName(filename);
mediaFile.FolderPath = folder;
mediaFile.Title = title;
mediaFile.Description = string.Empty;
mediaFile.Culture = UserSettings.ActiveLocaleCultureInfo.Name;
mediaFile.MimeType = MimeTypeInfo.GetCanonicalFromExtension(Path.GetExtension(filename));
if (mediaFile.MimeType == MimeTypeInfo.Default)
{
mediaFile.MimeType = MimeTypeInfo.GetCanonical(client.ResponseHeaders["content-type"]);
}
using (var readStream = stream)
{
using (Stream writeStream = mediaFile.GetNewWriteStream())
{
readStream.CopyTo(writeStream);
}
}
var counter = 0;
var extension = Path.GetExtension(mediaFile.FileName);
var name = mediaFile.FileName.GetNameWithoutExtension();
while (Exists(mediaFile))
{
counter++;
mediaFile.FileName = name + counter.ToString() + extension;
}
var addedFile = DataFacade.AddNew<IMediaFile>(mediaFile);
ImportedImages[src] = addedFile;
}
return ImportedImages[src];
}
protected XElement GetFunction1()
{
return
XElement.Parse(
@"<f:function name=""Widgets.Function"" xmlns:f=""http://www.composite.net/ns/function/1.0"" />");
}
protected string GetXhtmlContent(IEnumerable<XNode> nodes)
{
var result =
XElement.Parse(string.Format("<html xmlns='{0}'><head /><body>{1}</body></html>", Namespaces.Xhtml, string.Empty));
result.Elements().Last().Add(nodes);
return result.ToString();
}
private string GenerateUrlTitleFromTitle(string title)
{
return UrlFormattersPluginFacade.FormatUrl(title.Trim(), false);
}
private bool UrlTitleIsUniqueAmongSiblings(IPage newPage, Guid parentId)
{
var siblingPageUrlTitles =
(from page in PageServices.GetChildren(parentId)
select page.UrlTitle).ToList();
return !siblingPageUrlTitles.Any(urlTitle => urlTitle.Equals(newPage.UrlTitle, StringComparison.InvariantCultureIgnoreCase));
}
protected void ParsePage(string href, Guid parentId, IPageType pageType)
{
using (new DataScope(PublicationScope.Unpublished))
{
var pageContent = GetContent(href);
//Remove stuff
pageContent = Regex.Replace(pageContent, @"(?s)<script.*?(/>|</script>)", "", RegexOptions.Multiline);
pageContent = Regex.Replace(pageContent, @"(?s)<!--.*?-->", "", RegexOptions.Multiline);
pageContent = Regex.Replace(pageContent, @"<!DOCTYPE[^>[]*(\[[^]]*\])?>", "", RegexOptions.Multiline);
pageContent = Regex.Replace(pageContent, @"(?s)<ektdesignns_choices.*?(/>|</ektdesignns_choices>)", "", RegexOptions.Multiline);
var xpage = MarkupTransformationServices.TidyHtml(pageContent.Trim()).Output;
var xmlnsManager = new XmlNamespaceManager(new NameTable());
xmlnsManager.AddNamespace("x", Namespaces.Xhtml.ToString());
var xarticle = xpage.XPathSelectElement("//*[@id='mainContent']");
//Remove stuff
xarticle.XPathSelectElements("//*[@class='social']").Remove();
//var xdescription = xpage.XPathSelectElement("//x:meta[@name='description']", xmlnsManager);
var xdescription = xpage.XPathSelectElement("//x:input[@name='EktFormDescription']", xmlnsManager);
var newDescrition = xdescription.GetAttributeValue("value");
//var xkeywords = xpage.XPathSelectElement("//x:meta[@name='keywords']", xmlnsManager);
//var newKeyword = xkeywords.GetAttributeValue("content");
// if content have some function
foreach (var function in xarticle.XPathSelectElements("//*[@class='hs-cta-wrapper']"))
{
function.ReplaceWith(GetFunction1());
}
// depends html use XPATH
var xtitle = xarticle.XPathSelectElement("descendant::x:h1", xmlnsManager);
var xcontent = xarticle;
foreach (var img in xcontent.XPathSelectElements("descendant::x:img", xmlnsManager))
{
var src = img.GetAttributeValue("src");
var contentImage = ImportMedia(src, href);
if (contentImage == null)
{
}
else
{
var newContentImageUrl = MediaUrls.BuildUrl(contentImage, UrlKind.Internal);
img.SetAttributeValue("src", newContentImageUrl);
}
}
foreach (var img in xcontent.XPathSelectElements("descendant::x:a", xmlnsManager))
{
var ahref = img.GetAttributeValue("href");
// import link to media,
// '/uploadedImages'- default folder in Ektron
if (ahref.StartsWith("/uploadedImages"))
{
var contentImage = ImportMedia(ahref, href);
if (contentImage == null)
{
//nothing
}
else
{
var newContentImageUrl = MediaUrls.BuildUrl(contentImage, UrlKind.Internal);
img.SetAttributeValue("href", newContentImageUrl);
}
}
}
var newTitle = xtitle.Value.Trim();
xtitle.Remove();
var newContent = GetXhtmlContent(xcontent.Nodes());
var newPage = DataFacade.BuildNew<IPage>();
newPage.Id = Guid.NewGuid();
newPage.SourceCultureName = UserSettings.ActiveLocaleCultureInfo.Name;
newPage.PageTypeId = pageType.Id;
var templateRestrictions =
DataFacade.GetData<IPageTypePageTemplateRestriction>().
Where(f => f.PageTypeId == newPage.PageTypeId);
if (pageType.DefaultTemplateId != Guid.Empty)
{
newPage.TemplateId = pageType.DefaultTemplateId;
}
else if (templateRestrictions.Any())
{
newPage.TemplateId = templateRestrictions.First().PageTemplateId;
}
newPage.Description = newDescrition;
newPage.Title = newTitle;
newPage.MenuTitle = newTitle;
newPage.FriendlyUrl = "";
var newUrlTitle = Path.GetFileNameWithoutExtension(href.Last() == '/' ? href.Substring(0, href.Length - 1) : href);
newPage.UrlTitle = newUrlTitle;
var count = 1;
while (!UrlTitleIsUniqueAmongSiblings(newPage, parentId))
{
newPage.UrlTitle = newUrlTitle + count++;
}
newPage.PublicationStatus = GenericPublishProcessController.Draft;
newPage = newPage.AddPageAtBottom(parentId, true);
// One Placeholder
var pagePlaceholderContent = DataFacade.BuildNew<IPagePlaceholderContent>();
pagePlaceholderContent.PageId = newPage.Id;
pagePlaceholderContent.PlaceHolderId = "content";
pagePlaceholderContent.Content = newContent;
DataFacade.AddNew<IPagePlaceholderContent>(pagePlaceholderContent);
newPage.PublicationStatus = GenericPublishProcessController.Published;
DataFacade.Update(newPage);
}
}