mirror of
https://codeberg.org/icewind/ugc-scaper.git
synced 2026-06-03 10:14:11 +02:00
init
This commit is contained in:
commit
0c58410f6a
11 changed files with 1902 additions and 0 deletions
1
.envrc
Normal file
1
.envrc
Normal file
|
|
@ -0,0 +1 @@
|
|||
use flake
|
||||
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
/target
|
||||
result*
|
||||
.direnv
|
||||
1603
Cargo.lock
generated
Normal file
1603
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
13
Cargo.toml
Normal file
13
Cargo.toml
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
[package]
|
||||
name = "ugc-scraper"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
rust-version = "1.67"
|
||||
|
||||
[dependencies]
|
||||
tokio = { version = "1.34.0", features = ["macros", "rt-multi-thread", "rt"] }
|
||||
reqwest = "0.11.22"
|
||||
scraper = "0.18.1"
|
||||
miette = "5.10.0"
|
||||
thiserror = "1.0.50"
|
||||
main_error = "0.1.2"
|
||||
106
flake.lock
generated
Normal file
106
flake.lock
generated
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
{
|
||||
"nodes": {
|
||||
"naersk": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1686242667,
|
||||
"narHash": "sha256-I7Kwp06WX/9E+rEND1i1wjdKQQm3XiDxYOyNK9fuJu0=",
|
||||
"owner": "icewind1991",
|
||||
"repo": "naersk",
|
||||
"rev": "6d245a3bbb2ee31ec726bb57b9a8b206302e7110",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "icewind1991",
|
||||
"repo": "naersk",
|
||||
"rev": "6d245a3bbb2ee31ec726bb57b9a8b206302e7110",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1699994397,
|
||||
"narHash": "sha256-xxNeIcMNMXH2EA9IAX6Cny+50mvY22LhIBiGZV363gc=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "d4b5a67bbe9ef750bd2fdffd4cad400dd5553af8",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"id": "nixpkgs",
|
||||
"ref": "nixos-23.05",
|
||||
"type": "indirect"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
"inputs": {
|
||||
"naersk": "naersk",
|
||||
"nixpkgs": "nixpkgs",
|
||||
"rust-overlay": "rust-overlay",
|
||||
"utils": "utils"
|
||||
}
|
||||
},
|
||||
"rust-overlay": {
|
||||
"inputs": {
|
||||
"flake-utils": [
|
||||
"utils"
|
||||
],
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1700014539,
|
||||
"narHash": "sha256-YF+AoCicGzsrlNDrXZYCymfVyYwrMBox+6vbLBsrEQM=",
|
||||
"owner": "oxalica",
|
||||
"repo": "rust-overlay",
|
||||
"rev": "ed9fd98b28da90b1b28340f3230d35b2061b9752",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "oxalica",
|
||||
"repo": "rust-overlay",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"systems": {
|
||||
"locked": {
|
||||
"lastModified": 1681028828,
|
||||
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||
"owner": "nix-systems",
|
||||
"repo": "default",
|
||||
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nix-systems",
|
||||
"repo": "default",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"utils": {
|
||||
"inputs": {
|
||||
"systems": "systems"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1694529238,
|
||||
"narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=",
|
||||
"owner": "numtide",
|
||||
"repo": "flake-utils",
|
||||
"rev": "ff7b65b44d01cf9ba6a71320833626af21126384",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "numtide",
|
||||
"repo": "flake-utils",
|
||||
"type": "github"
|
||||
}
|
||||
}
|
||||
},
|
||||
"root": "root",
|
||||
"version": 7
|
||||
}
|
||||
89
flake.nix
Normal file
89
flake.nix
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
{
|
||||
inputs = {
|
||||
nixpkgs.url = "nixpkgs/nixos-23.05";
|
||||
utils.url = "github:numtide/flake-utils";
|
||||
naersk.url = "github:icewind1991/naersk?rev=6d245a3bbb2ee31ec726bb57b9a8b206302e7110";
|
||||
naersk.inputs.nixpkgs.follows = "nixpkgs";
|
||||
rust-overlay.url = "github:oxalica/rust-overlay";
|
||||
rust-overlay.inputs.nixpkgs.follows = "nixpkgs";
|
||||
rust-overlay.inputs.flake-utils.follows = "utils";
|
||||
};
|
||||
|
||||
outputs = {
|
||||
self,
|
||||
nixpkgs,
|
||||
utils,
|
||||
naersk,
|
||||
rust-overlay,
|
||||
}:
|
||||
utils.lib.eachDefaultSystem (system: let
|
||||
overlays = [(import rust-overlay)];
|
||||
pkgs = (import nixpkgs) {
|
||||
inherit system overlays;
|
||||
};
|
||||
inherit (pkgs) lib callPackage rust-bin mkShell;
|
||||
inherit (lib.sources) sourceByRegex;
|
||||
|
||||
msrv = (fromTOML (readFile ./Cargo.toml)).package.rust-version;
|
||||
inherit (builtins) fromTOML readFile;
|
||||
toolchain = rust-bin.stable.latest.default;
|
||||
msrvToolchain = rust-bin.stable."${msrv}".default;
|
||||
|
||||
naersk' = callPackage naersk {
|
||||
rustc = toolchain;
|
||||
cargo = toolchain;
|
||||
};
|
||||
msrvNaersk = callPackage naersk {
|
||||
rustc = msrvToolchain;
|
||||
cargo = msrvToolchain;
|
||||
};
|
||||
|
||||
src = sourceByRegex ./. ["Cargo.*" "(src|derive|benches|tests|examples|koth_bagel.*)(/.*)?"];
|
||||
nearskOpt = {
|
||||
pname = "vbsp";
|
||||
root = src;
|
||||
};
|
||||
in rec {
|
||||
packages = {
|
||||
check = naersk'.buildPackage (nearskOpt
|
||||
// {
|
||||
mode = "check";
|
||||
});
|
||||
clippy = naersk'.buildPackage (nearskOpt
|
||||
// {
|
||||
mode = "clippy";
|
||||
});
|
||||
test = naersk'.buildPackage (nearskOpt
|
||||
// {
|
||||
release = false;
|
||||
mode = "test";
|
||||
});
|
||||
msrv = msrvNaersk.buildPackage (nearskOpt
|
||||
// {
|
||||
mode = "check";
|
||||
});
|
||||
};
|
||||
|
||||
devShells = let
|
||||
tools = with pkgs; [
|
||||
bacon
|
||||
cargo-edit
|
||||
cargo-outdated
|
||||
cargo-audit
|
||||
cargo-msrv
|
||||
cargo-semver-checks
|
||||
];
|
||||
deps = with pkgs; [
|
||||
pkg-config
|
||||
openssl
|
||||
];
|
||||
in {
|
||||
default = mkShell {
|
||||
nativeBuildInputs = [toolchain] ++ tools ++ deps;
|
||||
};
|
||||
msrv = mkShell {
|
||||
nativeBuildInputs = [msrvToolchain] ++ tools ++ deps;
|
||||
};
|
||||
};
|
||||
});
|
||||
}
|
||||
4
src/data.rs
Normal file
4
src/data.rs
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
#[derive(Debug)]
|
||||
pub struct Player {
|
||||
pub name: String,
|
||||
}
|
||||
20
src/error.rs
Normal file
20
src/error.rs
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
use thiserror::Error;
|
||||
use miette::Diagnostic;
|
||||
|
||||
#[derive(Debug, Error, Diagnostic)]
|
||||
pub enum ScrapeError {
|
||||
#[error("Failed to request data: {0:#}")]
|
||||
Request(#[from] reqwest::Error),
|
||||
#[error(transparent)]
|
||||
#[diagnostic(transparent)]
|
||||
Parse(#[from] ParseError)
|
||||
}
|
||||
|
||||
#[derive(Debug, Error, Diagnostic)]
|
||||
pub enum ParseError {
|
||||
#[error("Couldn't find expected element '{selector}' for {role}")]
|
||||
ElementNotFound {
|
||||
selector: &'static str,
|
||||
role: &'static str
|
||||
}
|
||||
}
|
||||
18
src/main.rs
Normal file
18
src/main.rs
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
mod error;
|
||||
mod parser;
|
||||
pub mod data;
|
||||
|
||||
use main_error::MainResult;
|
||||
use reqwest::get;
|
||||
pub use error::*;
|
||||
use crate::parser::{PlayerParser, Parser};
|
||||
|
||||
pub type Result<T, E = ScrapeError> = std::result::Result<T, E>;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> MainResult {
|
||||
let body = get("https://www.ugcleague.com/players_page.cfm?player_id=76561198024494988").await?.text().await?;
|
||||
let parser = PlayerParser::new();
|
||||
dbg!(parser.parse(&body)?);
|
||||
Ok(())
|
||||
}
|
||||
10
src/parser/mod.rs
Normal file
10
src/parser/mod.rs
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
use crate::Result;
|
||||
|
||||
mod player;
|
||||
|
||||
pub use player::*;
|
||||
|
||||
pub trait Parser {
|
||||
type Output;
|
||||
fn parse(&self, document: &str) -> Result<Self::Output>;
|
||||
}
|
||||
35
src/parser/player.rs
Normal file
35
src/parser/player.rs
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
use scraper::{Html, Selector};
|
||||
use super::Parser;
|
||||
use crate::{ParseError, Result};
|
||||
use crate::data::Player;
|
||||
|
||||
const SELECTOR_PLAYER_NAME: &str = ".col-md-4 > h3 > b";
|
||||
|
||||
|
||||
pub struct PlayerParser {
|
||||
selector_name: Selector,
|
||||
}
|
||||
|
||||
impl PlayerParser {
|
||||
pub fn new() -> Self {
|
||||
PlayerParser {
|
||||
selector_name: Selector::parse(SELECTOR_PLAYER_NAME).unwrap(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Parser for PlayerParser {
|
||||
type Output = Player;
|
||||
|
||||
fn parse(&self, document: &str) -> Result<Self::Output> {
|
||||
let document = Html::parse_document(&document);
|
||||
|
||||
let name = document.select(&self.selector_name).next().ok_or(ParseError::ElementNotFound {
|
||||
selector: SELECTOR_PLAYER_NAME,
|
||||
role: "player name",
|
||||
})?.text().next().unwrap_or_default().to_string();
|
||||
Ok(Player {
|
||||
name
|
||||
})
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue