mirror of
https://codeberg.org/icewind/ugc-scaper.git
synced 2026-06-03 10:14:11 +02:00
add match page
This commit is contained in:
parent
caf376ac42
commit
668edc434a
9 changed files with 3614 additions and 7 deletions
11
src/data.rs
11
src/data.rs
|
|
@ -189,3 +189,14 @@ pub struct Season {
|
|||
pub id: String,
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
|
||||
pub struct MatchInfo {
|
||||
pub comment: Option<String>,
|
||||
pub comment_author: Option<String>,
|
||||
pub team_home: TeamRef,
|
||||
pub team_away: TeamRef,
|
||||
pub score_home: u8,
|
||||
pub score_away: u8,
|
||||
}
|
||||
|
|
|
|||
25
src/lib.rs
25
src/lib.rs
|
|
@ -3,10 +3,12 @@ mod error;
|
|||
#[doc(hidden)]
|
||||
pub mod parser;
|
||||
|
||||
use crate::data::{MembershipHistory, Player, RosterHistory, Seasons, Team, TeamRef, TeamSeason};
|
||||
use crate::data::{
|
||||
MatchInfo, MembershipHistory, Player, RosterHistory, Seasons, Team, TeamRef, TeamSeason,
|
||||
};
|
||||
use crate::parser::{
|
||||
Parser, PlayerDetailsParser, PlayerParser, SeasonsParser, TeamLookupParser, TeamMatchesParser,
|
||||
TeamParser, TeamRosterHistoryParser,
|
||||
MatchPageParser, Parser, PlayerDetailsParser, PlayerParser, SeasonsParser, TeamLookupParser,
|
||||
TeamMatchesParser, TeamParser, TeamRosterHistoryParser,
|
||||
};
|
||||
pub use error::*;
|
||||
use reqwest::redirect::Policy;
|
||||
|
|
@ -25,6 +27,7 @@ pub struct UgcClient {
|
|||
team_matches_parser: TeamMatchesParser,
|
||||
seasons_parser: SeasonsParser,
|
||||
team_lookup_parser: TeamLookupParser,
|
||||
match_page_parser: MatchPageParser,
|
||||
}
|
||||
|
||||
/// "API client" for ugc by scraping the website
|
||||
|
|
@ -39,6 +42,7 @@ impl UgcClient {
|
|||
team_matches_parser: TeamMatchesParser::new(),
|
||||
seasons_parser: SeasonsParser::new(),
|
||||
team_lookup_parser: TeamLookupParser::new(),
|
||||
match_page_parser: MatchPageParser::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -161,4 +165,19 @@ impl UgcClient {
|
|||
self.teams("https://www.ugcleague.com/team_lookup_tf22.cfm")
|
||||
.await
|
||||
}
|
||||
|
||||
/// Get match page info
|
||||
pub async fn match_info(&self, id: u32) -> Result<MatchInfo> {
|
||||
let body = self
|
||||
.client
|
||||
.get(&format!(
|
||||
"https://www.ugcleague.com/matchpage_tf2h.cfm?mid={}",
|
||||
id
|
||||
))
|
||||
.send()
|
||||
.await?
|
||||
.text()
|
||||
.await?;
|
||||
self.match_page_parser.parse(&body)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
138
src/parser/match_page.rs
Normal file
138
src/parser/match_page.rs
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
use super::Parser;
|
||||
use crate::data::{MatchInfo, TeamRef};
|
||||
use crate::parser::{select_last_text, select_text, team_id_from_link, ElementExt};
|
||||
use crate::{ParseError, Result};
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
const SELECTOR_MATCH_COMMENT_AUTHOR: &str = ".row-fluid .col-md-12 span.text-success";
|
||||
const SELECTOR_MATCH_COMMENT: &str = ".row-fluid .col-md-12 > .white-row-light-small > p";
|
||||
const SELECTOR_MATCH_TEAM_LINK: &str = "a[href^=\"team_page\"]:not(.btn-large)";
|
||||
const SELECTOR_MATCH_RESULT_TEAM: &str =
|
||||
".table.table-condensed.table-bordered tr:nth-child(2) td:nth-child(1)";
|
||||
const SELECTOR_MATCH_RESULT_SCORE: &str =
|
||||
".table.table-condensed.table-bordered tr:nth-child(2) td:nth-child(2)";
|
||||
|
||||
pub struct MatchPageParser {
|
||||
selector_author: Selector,
|
||||
selector_comment: Selector,
|
||||
selector_team_link: Selector,
|
||||
selector_result_team: Selector,
|
||||
selector_result_score: Selector,
|
||||
}
|
||||
|
||||
impl Default for MatchPageParser {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl MatchPageParser {
|
||||
pub fn new() -> Self {
|
||||
MatchPageParser {
|
||||
selector_author: Selector::parse(SELECTOR_MATCH_COMMENT_AUTHOR).unwrap(),
|
||||
selector_comment: Selector::parse(SELECTOR_MATCH_COMMENT).unwrap(),
|
||||
selector_team_link: Selector::parse(SELECTOR_MATCH_TEAM_LINK).unwrap(),
|
||||
selector_result_team: Selector::parse(SELECTOR_MATCH_RESULT_TEAM).unwrap(),
|
||||
selector_result_score: Selector::parse(SELECTOR_MATCH_RESULT_SCORE).unwrap(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Parser for MatchPageParser {
|
||||
type Output = MatchInfo;
|
||||
|
||||
fn parse(&self, document: &str) -> Result<Self::Output> {
|
||||
let document = Html::parse_document(document);
|
||||
|
||||
let author = select_text(document.root_element(), &self.selector_author);
|
||||
let comment = select_last_text(document.root_element(), &self.selector_comment);
|
||||
|
||||
let mut team_links = document.select(&self.selector_team_link);
|
||||
let team_link_home = team_links.next().ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_MATCH_TEAM_LINK,
|
||||
role: "home team link",
|
||||
})?;
|
||||
let team_link_away = team_links.next().ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_MATCH_TEAM_LINK,
|
||||
role: "away team link",
|
||||
})?;
|
||||
let home_team_id = team_id_from_link(team_link_home.attr("href").unwrap_or_default())?;
|
||||
let away_team_id = team_id_from_link(team_link_away.attr("href").unwrap_or_default())?;
|
||||
|
||||
let mut team_names = document.select(&self.selector_result_team);
|
||||
let team_name_home = team_names
|
||||
.next()
|
||||
.ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_MATCH_RESULT_TEAM,
|
||||
role: "home team link",
|
||||
})?
|
||||
.first_text()
|
||||
.ok_or(ParseError::EmptyText {
|
||||
role: "home team name",
|
||||
selector: SELECTOR_MATCH_RESULT_TEAM,
|
||||
})?
|
||||
.to_string();
|
||||
let team_name_away = team_names
|
||||
.next()
|
||||
.ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_MATCH_RESULT_TEAM,
|
||||
role: "away team link",
|
||||
})?
|
||||
.first_text()
|
||||
.ok_or(ParseError::EmptyText {
|
||||
role: "away team name",
|
||||
selector: SELECTOR_MATCH_RESULT_TEAM,
|
||||
})?
|
||||
.to_string();
|
||||
|
||||
let mut team_scores = document.select(&self.selector_result_score);
|
||||
|
||||
let team_score_home = team_scores
|
||||
.next()
|
||||
.ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_MATCH_RESULT_SCORE,
|
||||
role: "home team score",
|
||||
})?
|
||||
.first_text()
|
||||
.ok_or(ParseError::EmptyText {
|
||||
role: "home team score",
|
||||
selector: SELECTOR_MATCH_RESULT_SCORE,
|
||||
})?
|
||||
.parse()
|
||||
.map_err(|_| ParseError::InvalidText {
|
||||
role: "away team score",
|
||||
text: "dont have this".to_string(),
|
||||
})?;
|
||||
let team_score_away = team_scores
|
||||
.next()
|
||||
.ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_MATCH_RESULT_SCORE,
|
||||
role: "away team link",
|
||||
})?
|
||||
.first_text()
|
||||
.ok_or(ParseError::EmptyText {
|
||||
role: "away team name",
|
||||
selector: SELECTOR_MATCH_RESULT_SCORE,
|
||||
})?
|
||||
.parse()
|
||||
.map_err(|_| ParseError::InvalidText {
|
||||
role: "home team score",
|
||||
text: "dont have this".to_string(),
|
||||
})?;
|
||||
|
||||
Ok(MatchInfo {
|
||||
comment_author: author.map(String::from),
|
||||
comment: comment.map(String::from),
|
||||
score_away: team_score_away,
|
||||
score_home: team_score_home,
|
||||
team_home: TeamRef {
|
||||
name: team_name_home.to_string(),
|
||||
id: home_team_id,
|
||||
},
|
||||
team_away: TeamRef {
|
||||
name: team_name_away.to_string(),
|
||||
id: away_team_id,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
@ -4,6 +4,7 @@ use steamid_ng::SteamID;
|
|||
use time::format_description::FormatItem;
|
||||
use time::macros::format_description;
|
||||
|
||||
mod match_page;
|
||||
mod player;
|
||||
mod player_details;
|
||||
mod seasons;
|
||||
|
|
@ -12,6 +13,7 @@ mod team_lookup;
|
|||
mod team_matches;
|
||||
mod team_roster_history;
|
||||
|
||||
pub use match_page::*;
|
||||
pub use player::*;
|
||||
pub use player_details::*;
|
||||
pub use seasons::*;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue