Skip to content

Commit

Permalink
add XRobotsTag, initial implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
hafihaf123 committed Jan 12, 2025
1 parent 33fdb11 commit f3c6d97
Show file tree
Hide file tree
Showing 6 changed files with 254 additions and 0 deletions.
3 changes: 3 additions & 0 deletions rama-http-types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@ pub mod header {
"x-real-ip",
];

// non-std web-crawler info headers
static_header!["x-robots-tag",];

/// Static Header Value that is can be used as `User-Agent` or `Server` header.
pub static RAMA_ID_HEADER_VALUE: HeaderValue = HeaderValue::from_static(
const_format::formatcp!("{}/{}", rama_utils::info::NAME, rama_utils::info::VERSION),
Expand Down
4 changes: 4 additions & 0 deletions rama-http/src/headers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,4 +102,8 @@ pub mod authorization {
pub use ::rama_http_types::headers::HeaderExt;

pub(crate) mod util;

mod x_robots_tag;
pub use x_robots_tag::XRobotsTag;

pub use util::quality_value::{Quality, QualityValue};
46 changes: 46 additions & 0 deletions rama-http/src/headers/x_robots_tag/element.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
use crate::headers::x_robots_tag::rule::Rule;
use rama_core::error::OpaqueError;
use std::fmt::Formatter;
use std::str::FromStr;

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Element {
bot_name: Option<String>, // or `rama_ua::UserAgent` ???
indexing_rule: Rule,
}

impl std::fmt::Display for Element {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match &self.bot_name {
None => write!(f, "{}", self.indexing_rule),
Some(bot) => write!(f, "{}: {}", bot, self.indexing_rule),
}
}
}

impl FromStr for Element {
type Err = OpaqueError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
let (bot_name, indexing_rule) = match Rule::from_str(s) {
Ok(rule) => (None, Ok(rule)),
Err(e) => match *s.split(":").map(str::trim).collect::<Vec<_>>().as_slice() {
[bot_name, rule] => (Some(bot_name.to_owned()), rule.parse()),
[bot_name, rule_name, rule_value] => (
Some(bot_name.to_owned()),
[rule_name, rule_value][..].try_into(),
),
_ => (None, Err(e)),
},
};
match indexing_rule {
Ok(indexing_rule) => Ok(Element {
bot_name,
indexing_rule,
}),
Err(_) => Err(OpaqueError::from_display(
"Failed to parse XRobotsTagElement",
)),
}
}
}
19 changes: 19 additions & 0 deletions rama-http/src/headers/x_robots_tag/iterator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
use crate::headers::x_robots_tag::Element;

#[derive(Debug, Clone)]
/// An iterator over the `XRobotsTag` header's elements.
pub struct Iterator(std::vec::IntoIter<Element>);

impl core::iter::Iterator for Iterator {
type Item = Element;

fn next(&mut self) -> Option<Self::Item> {
self.0.next()
}
}

impl Iterator {
pub fn new(elements: std::vec::IntoIter<Element>) -> Self {
Self(elements)
}
}
69 changes: 69 additions & 0 deletions rama-http/src/headers/x_robots_tag/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
mod rule;

mod element;

mod iterator;

// ----------------------------------------------- \\

use crate::headers::Header;
use element::Element;
use http::{HeaderName, HeaderValue};
use iterator::Iterator as XRobotsTagIterator;
use std::fmt::Formatter;
use std::iter::Iterator;

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct XRobotsTag(Vec<Element>);

impl Header for XRobotsTag {
fn name() -> &'static HeaderName {
&crate::header::X_ROBOTS_TAG
}

fn decode<'i, I>(values: &mut I) -> Result<Self, headers::Error>
where
Self: Sized,
I: Iterator<Item = &'i HeaderValue>,
{
crate::headers::util::csv::from_comma_delimited(values).map(XRobotsTag)
}

fn encode<E: Extend<HeaderValue>>(&self, values: &mut E) {
use std::fmt;
struct Format<F>(F);
impl<F> fmt::Display for Format<F>
where
F: Fn(&mut Formatter<'_>) -> fmt::Result,
{
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
self.0(f)
}
}
let s = format!(
"{}",
Format(|f: &mut Formatter<'_>| {
crate::headers::util::csv::fmt_comma_delimited(&mut *f, self.0.iter())
})
);
values.extend(Some(HeaderValue::from_str(&s).unwrap()))
}
}

impl FromIterator<Element> for XRobotsTag {
fn from_iter<T>(iter: T) -> Self
where
T: IntoIterator<Item = Element>,
{
XRobotsTag(iter.into_iter().collect())
}
}

impl IntoIterator for XRobotsTag {
type Item = Element;
type IntoIter = XRobotsTagIterator;

fn into_iter(self) -> Self::IntoIter {
XRobotsTagIterator::new(self.0.into_iter())
}
}
113 changes: 113 additions & 0 deletions rama-http/src/headers/x_robots_tag/rule.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
use rama_core::error::OpaqueError;
use std::convert::{TryFrom, TryInto};
use std::fmt::Formatter;
use std::str::FromStr;

#[derive(Clone, Debug, Eq, PartialEq)]
pub(super) enum Rule {
All,
NoIndex,
NoFollow,
None,
NoSnippet,
IndexIfEmbedded,
MaxSnippet(u32),
MaxImagePreview(MaxImagePreviewSetting),
MaxVideoPreview(Option<u32>),
NoTranslate,
NoImageIndex,
UnavailableAfter(String), // "A date must be specified in a format such as RFC 822, RFC 850, or ISO 8601."
}

impl std::fmt::Display for Rule {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Rule::All => write!(f, "all"),
Rule::NoIndex => write!(f, "noindex"),
Rule::NoFollow => write!(f, "nofollow"),
Rule::None => write!(f, "none"),
Rule::NoSnippet => write!(f, "nosnippet"),
Rule::IndexIfEmbedded => write!(f, "indexifembedded"),
Rule::MaxSnippet(number) => write!(f, "maxsnippet: {}", number),
Rule::MaxImagePreview(setting) => write!(f, "max-image-preview: {}", setting),
Rule::MaxVideoPreview(number) => match number {
Some(number) => write!(f, "max-video-preview: {}", number),
None => write!(f, "max-video-preview: -1"),
},
Rule::NoTranslate => write!(f, "notranslate"),
Rule::NoImageIndex => write!(f, "noimageindex"),
Rule::UnavailableAfter(date) => write!(f, "unavailable_after: {}", date),
}
}
}

impl FromStr for Rule {
type Err = OpaqueError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
s.split(":")
.map(str::trim)
.collect::<Vec<_>>()
.as_slice()
.try_into()
}
}

impl TryFrom<&[&str]> for Rule {
type Error = OpaqueError;

fn try_from(value: &[&str]) -> Result<Self, Self::Error> {
match *value {
["all"] => Ok(Rule::All),
["no_index"] => Ok(Rule::NoIndex),
["no_follow"] => Ok(Rule::NoFollow),
["none"] => Ok(Rule::None),
["no_snippet"] => Ok(Rule::NoSnippet),
["indexifembedded"] => Ok(Rule::IndexIfEmbedded),
["max-snippet", number] => Ok(Rule::MaxSnippet(
number.parse().map_err(OpaqueError::from_display)?,
)),
["max-image-preview", setting] => Ok(Rule::MaxImagePreview(setting.parse()?)),
["max-video-preview", number] => Ok(Rule::MaxVideoPreview(match number {
"-1" => None,
n => Some(n.parse().map_err(OpaqueError::from_display)?),
})),
["notranslate"] => Ok(Rule::NoTranslate),
["noimageindex"] => Ok(Rule::NoImageIndex),
["unavailable_after", date] => Ok(Rule::UnavailableAfter(date.to_owned())),
_ => Err(OpaqueError::from_display("Invalid X-Robots-Tag rule")),
}
}
}

#[derive(Clone, Debug, Eq, PartialEq)]
enum MaxImagePreviewSetting {
None,
Standard,
Large,
}

impl std::fmt::Display for MaxImagePreviewSetting {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
MaxImagePreviewSetting::None => write!(f, "none"),
MaxImagePreviewSetting::Standard => write!(f, "standard"),
MaxImagePreviewSetting::Large => write!(f, "large"),
}
}
}

impl FromStr for MaxImagePreviewSetting {
type Err = OpaqueError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"none" => Ok(MaxImagePreviewSetting::None),
"standard" => Ok(MaxImagePreviewSetting::Standard),
"large" => Ok(MaxImagePreviewSetting::Large),
_ => Err(OpaqueError::from_display(
"failed to parse MaxImagePreviewSetting",
)),
}
}
}

0 comments on commit f3c6d97

Please sign in to comment.