Added Magnet Parsing Code
The ode basically uses a ton of regex checks to find specific parts of the magnet url, and then chops it up into the Magnet struct.
This commit is contained in:
parent
ab009683c2
commit
0b632edb9a
|
@ -8,3 +8,11 @@ Cargo.lock
|
||||||
|
|
||||||
# These are backup files generated by rustfmt
|
# These are backup files generated by rustfmt
|
||||||
**/*.rs.bk
|
**/*.rs.bk
|
||||||
|
|
||||||
|
|
||||||
|
# Added by cargo
|
||||||
|
#
|
||||||
|
# already existing elements were commented out
|
||||||
|
|
||||||
|
/target
|
||||||
|
#Cargo.lock
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
[package]
|
||||||
|
name = "magnet-url-rs"
|
||||||
|
version = "1.0.0"
|
||||||
|
authors = ["William Batista <bootlegbilly@protonmail.ch>"]
|
||||||
|
license-file = "LICENSE.md"
|
||||||
|
description = "A simple, hackable magnet url parser in Rust"
|
||||||
|
homepage = "https://github.com/billyb2/magnet-url-rs"
|
||||||
|
repository = "https://github.com/billyb2/magnet-url-rs"
|
||||||
|
readme = "README.md"
|
||||||
|
categories = ["parsing", "encoding", "parser-implementations"]
|
||||||
|
keywords = ["torrent", "magnet", "magnet_url", "parse"]
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
[profile.release]
|
||||||
|
lto = "fat"
|
||||||
|
opt-level = 3
|
||||||
|
codegen-units = 1
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
lazy_static = "1"
|
||||||
|
regex = "1"
|
21
LICENSE
21
LICENSE
|
@ -1,21 +0,0 @@
|
||||||
MIT License
|
|
||||||
|
|
||||||
Copyright (c) 2021 William Batista
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
|
||||||
in the Software without restriction, including without limitation the rights
|
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
|
||||||
furnished to do so, subject to the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be included in all
|
|
||||||
copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
SOFTWARE.
|
|
60
README.md
60
README.md
|
@ -1 +1,59 @@
|
||||||
# parse-magnet-rs
|
# The Rust Magnet URL Parser!
|
||||||
|
# Intro
|
||||||
|
parse-magnet-rs has the goal of, as you may have guessed, parsing the parts of magnets. It does
|
||||||
|
this using some relatively simple regexes. The crate is designed to be very simple and efficient,
|
||||||
|
with a lot of flexibility. It's also designed to be relatively easy to handle errors, and
|
||||||
|
modification of it's source is greatly encouraged through documentation and it's license.
|
||||||
|
|
||||||
|
## How to use this crate
|
||||||
|
Parsing a magnet is very simple:
|
||||||
|
|
||||||
|
```
|
||||||
|
use magnet-url-rs:Magnet;
|
||||||
|
let magnet_url = Magnet::new("magnet:?xt=urn:btih:08ada5a7a6183aae1e09d831df6748d566095a10&dn=Sintel&tr=udp%3A%2F%2Fexplodie.org%3A6969&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Ftracker.empire-js.us%3A1337&tr=udp%3A%2F%2Ftracker.leechers-paradise.org%3A6969&tr=udp%3A%2F%2Ftracker.opentrackr.org%3A1337&tr=wss%3A%2F%2Ftracker.btorrent.xyz&tr=wss%3A%2F%2Ftracker.fastcast.nz&tr=wss%3A%2F%2Ftracker.openwebtorrent.com&ws=https%3A%2F%2Fwebtorrent.io%2Ftorrents%2F&xs=https%3A%2F%2Fwebtorrent.io%2Ftorrents%2Fsintel.torrent");
|
||||||
|
```
|
||||||
|
|
||||||
|
This returns the Magnet struct, which is made up of the fields listed below this section. To
|
||||||
|
access one of these fields is also very simple:
|
||||||
|
|
||||||
|
```
|
||||||
|
use magnet-url-rs:Magnet;
|
||||||
|
let magnet_url = Magnet::new("magnet:?xt=urn:btih:08ada5a7a6183aae1e09d831df6748d566095a10&dn=Sintel&tr=udp%3A%2F%2Fexplodie.org%3A6969&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Ftracker.empire-js.us%3A1337&tr=udp%3A%2F%2Ftracker.leechers-paradise.org%3A6969&tr=udp%3A%2F%2Ftracker.opentrackr.org%3A1337&tr=wss%3A%2F%2Ftracker.btorrent.xyz&tr=wss%3A%2F%2Ftracker.fastcast.nz&tr=wss%3A%2F%2Ftracker.openwebtorrent.com&ws=https%3A%2F%2Fwebtorrent.io%2Ftorrents%2F&xs=https%3A%2F%2Fwebtorrent.io%2Ftorrents%2Fsintel.torrent");
|
||||||
|
println!("{:?}", magnet_url.dn);
|
||||||
|
```
|
||||||
|
|
||||||
|
If you'd like to modify parts of the magnet_url to customize it, that can be done as well!
|
||||||
|
|
||||||
|
```
|
||||||
|
use magnet-url-rs:Magnet;
|
||||||
|
let mut magnet_url = Magnet::new("magnet:?xt=urn:btih:08ada5a7a6183aae1e09d831df6748d566095a10&dn=Sintel&tr=udp%3A%2F%2Fexplodie.org%3A6969&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Ftracker.empire-js.us%3A1337&tr=udp%3A%2F%2Ftracker.leechers-paradise.org%3A6969&tr=udp%3A%2F%2Ftracker.opentrackr.org%3A1337&tr=wss%3A%2F%2Ftracker.btorrent.xyz&tr=wss%3A%2F%2Ftracker.fastcast.nz&tr=wss%3A%2F%2Ftracker.openwebtorrent.com&ws=https%3A%2F%2Fwebtorrent.io%2Ftorrents%2F&xs=https%3A%2F%2Fwebtorrent.io%2Ftorrents%2Fsintel.torrent");
|
||||||
|
println!("{:?}", magnet_url.dn);
|
||||||
|
magnet_url.dn = String::from("hello_world");
|
||||||
|
println!("{:?}", magnet_url.dn);
|
||||||
|
```
|
||||||
|
|
||||||
|
In fact, you can construct your own magnet url as well, as long as you fill in all the
|
||||||
|
parameters!
|
||||||
|
|
||||||
|
```
|
||||||
|
use magnet-url-rs:Magnet;
|
||||||
|
let magnet_url =
|
||||||
|
//Note, this magnet won't actually download, sorry :/
|
||||||
|
Magnet {
|
||||||
|
dn: "hello_world".to_string(),
|
||||||
|
hash_type: "sha1".to_string(),
|
||||||
|
xt: "2aae6c35c94fcfb415dbe95f408b9ce91ee846ed".to_string(),
|
||||||
|
xl: 1234567890,
|
||||||
|
tr:
|
||||||
|
{
|
||||||
|
let mut tr_vec = Vec::new();
|
||||||
|
tr_vec.push("https://example.com/".to_string())
|
||||||
|
tr_vec
|
||||||
|
},
|
||||||
|
kt: "cool+stuff".to_string(),
|
||||||
|
ws: String::new(),
|
||||||
|
acceptable_source: String::new(),
|
||||||
|
mt: String::new(),
|
||||||
|
|
||||||
|
};
|
||||||
|
```
|
|
@ -0,0 +1,164 @@
|
||||||
|
use regex::Regex;
|
||||||
|
|
||||||
|
#[macro_use]
|
||||||
|
extern crate lazy_static;
|
||||||
|
|
||||||
|
///The regexes used to identify specific parts of the magnet
|
||||||
|
const MAGNET_SPEC_RE_STR: &str = r"magnet:\?";
|
||||||
|
const DISPLAY_NAME_RE_STR: &str = r"dn=([A-Za-z0-9!@#$%^:*<>,?/()_+=.{}\{}\-]*)(&|$|\s)";
|
||||||
|
const EXACT_TOPIC_RE_STR: &str = r"xt=urn:(sha1|btih|ed2k|aich|kzhash|md5|tree:tiger):([A-Fa-f0-9]+|[A-Za-z2-7]+)";
|
||||||
|
const ADDRESS_TRACKER_RE_STR: &str = r"tr=([A-Za-z0-9!@#$%^:*<>,?/()_+=.{}\{}\-]*)(&|$|\s)";
|
||||||
|
const KEYWORD_TOPIC_RE_STR: &str = r"kt=([A-Za-z0-9!@#$%^:*<>,?/()_+=.{}\{}\-]*)(&|$|\s)";
|
||||||
|
const EXACT_SOURCE_RE_STR: &str = r"xs=((\w+)://[A-Za-z0-9!@#$%^:*<>,?/()_+=.{}\\-]*)(&|$|\s)";
|
||||||
|
const EXACT_LENGTH_RE_STR: &str = r"xl=(\d*)(&|$|\s)";
|
||||||
|
const WEB_SEED_RE_STR: &str = r"ws=([A-Za-z0-9!@#$%^:*<>,?/()_+=.{}\{}\-]*)(&|$|\s)";
|
||||||
|
const ACCEPTABLE_SOURCE_RE_STR: &str = r"as=((\w+)://[A-Za-z0-9!@#$%^:*<>,?/()_+=.{}\\-]*)(&|$|\s))";
|
||||||
|
const MANIFEST_TOPIC_RE_STR: &str = r"mt=((\w+)://[A-Za-z0-9!@#$%^:*<>,?/()_+=.{}\\-]*|urn:(sha1|btih|ed2k|aich|kzhash|md5|tree:tiger):([A-Fa-f0-9]+|[A-Za-z2-7]+))(&|$|\s))";
|
||||||
|
|
||||||
|
|
||||||
|
///# Intro
|
||||||
|
/// magnet-url-rs has the goal of, as you may have guessed, parsing the parts of magnets. It does
|
||||||
|
/// this using some relatively simple regexes. The crate is designed to be very simple and efficient,
|
||||||
|
/// with a lot of flexibility. It's also designed to be relatively easy to handle errors, and
|
||||||
|
/// modification of it's source is greatly encouraged through documentation and it's license.
|
||||||
|
///
|
||||||
|
/// ## How to use this crate
|
||||||
|
/// Parsing a magnet is very simple:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use magnet-url-rs:Magnet;
|
||||||
|
/// let magnet_url = Magnet::new("magnet:?xt=urn:btih:08ada5a7a6183aae1e09d831df6748d566095a10&dn=Sintel&tr=udp%3A%2F%2Fexplodie.org%3A6969&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Ftracker.empire-js.us%3A1337&tr=udp%3A%2F%2Ftracker.leechers-paradise.org%3A6969&tr=udp%3A%2F%2Ftracker.opentrackr.org%3A1337&tr=wss%3A%2F%2Ftracker.btorrent.xyz&tr=wss%3A%2F%2Ftracker.fastcast.nz&tr=wss%3A%2F%2Ftracker.openwebtorrent.com&ws=https%3A%2F%2Fwebtorrent.io%2Ftorrents%2F&xs=https%3A%2F%2Fwebtorrent.io%2Ftorrents%2Fsintel.torrent");
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// This returns the Magnet struct, which is made up of the fields listed below this section. To
|
||||||
|
/// access one of these fields is also very simple:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use magnet-url-rs:Magnet;
|
||||||
|
/// let magnet_url = Magnet::new("magnet:?xt=urn:btih:08ada5a7a6183aae1e09d831df6748d566095a10&dn=Sintel&tr=udp%3A%2F%2Fexplodie.org%3A6969&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Ftracker.empire-js.us%3A1337&tr=udp%3A%2F%2Ftracker.leechers-paradise.org%3A6969&tr=udp%3A%2F%2Ftracker.opentrackr.org%3A1337&tr=wss%3A%2F%2Ftracker.btorrent.xyz&tr=wss%3A%2F%2Ftracker.fastcast.nz&tr=wss%3A%2F%2Ftracker.openwebtorrent.com&ws=https%3A%2F%2Fwebtorrent.io%2Ftorrents%2F&xs=https%3A%2F%2Fwebtorrent.io%2Ftorrents%2Fsintel.torrent");
|
||||||
|
/// println!("{:?}", magnet_url.dn);
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// If you'd like to modify parts of the magnet_url to customize it, that can be done as well!
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use magnet-url-rs:Magnet;
|
||||||
|
/// let mut magnet_url = Magnet::new("magnet:?xt=urn:btih:08ada5a7a6183aae1e09d831df6748d566095a10&dn=Sintel&tr=udp%3A%2F%2Fexplodie.org%3A6969&tr=udp%3A%2F%2Ftracker.coppersurfer.tk%3A6969&tr=udp%3A%2F%2Ftracker.empire-js.us%3A1337&tr=udp%3A%2F%2Ftracker.leechers-paradise.org%3A6969&tr=udp%3A%2F%2Ftracker.opentrackr.org%3A1337&tr=wss%3A%2F%2Ftracker.btorrent.xyz&tr=wss%3A%2F%2Ftracker.fastcast.nz&tr=wss%3A%2F%2Ftracker.openwebtorrent.com&ws=https%3A%2F%2Fwebtorrent.io%2Ftorrents%2F&xs=https%3A%2F%2Fwebtorrent.io%2Ftorrents%2Fsintel.torrent");
|
||||||
|
/// println!("{:?}", magnet_url.dn);
|
||||||
|
/// magnet_url.dn = String::from("hello_world");
|
||||||
|
/// println!("{:?}", magnet_url.dn);
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// In fact, you can construct your own magnet url as well, as long as you fill in all the
|
||||||
|
/// parameters!
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use magnet-url-rs:Magnet;
|
||||||
|
/// let magnet_url =
|
||||||
|
/// //Note, this magnet won't actually download, sorry :/
|
||||||
|
/// Magnet {
|
||||||
|
/// dn: "hello_world".to_string(),
|
||||||
|
/// hash_type: "sha1".to_string(),
|
||||||
|
/// xt: "2aae6c35c94fcfb415dbe95f408b9ce91ee846ed".to_string(),
|
||||||
|
/// xl: 1234567890,
|
||||||
|
/// tr:
|
||||||
|
/// {
|
||||||
|
/// let mut tr_vec = Vec::new();
|
||||||
|
/// tr_vec.push("https://example.com/".to_string())
|
||||||
|
/// tr_vec
|
||||||
|
/// },
|
||||||
|
/// kt: "cool+stuff".to_string(),
|
||||||
|
/// ws: String::new(),
|
||||||
|
/// acceptable_source: String::new(),
|
||||||
|
/// mt: String::new(),
|
||||||
|
///
|
||||||
|
/// };
|
||||||
|
/// ```
|
||||||
|
pub struct Magnet {
|
||||||
|
///Display Name of the torrent
|
||||||
|
pub dn: String,
|
||||||
|
///type of hash used in the exact topic
|
||||||
|
pub hash_type: String,
|
||||||
|
///eXact Topic: URN containing the file hash. The URN is specific to the protocol so a file hash
|
||||||
|
/// URN under btih (BitTorrent) would be completely different than the file hash URN for ed2k
|
||||||
|
pub xt: String,
|
||||||
|
///eXact Length: The size (in bytes)
|
||||||
|
///The length is isize instead of usize since it makes error handling easier, as -1 is given if
|
||||||
|
/// no length is set. I considered making it a String, but decided against it since it's simpler
|
||||||
|
/// for the developer when they can just deal with an integer
|
||||||
|
pub xl: isize,
|
||||||
|
///eXact Source: Either an HTTP (or HTTPS, FTP, FTPS, etc.) download source for the file pointed
|
||||||
|
/// to by the Magnet link, the address of a P2P source for the file or the address of a hub (in
|
||||||
|
/// the case of DC++), by which a client tries to connect directly, asking for the file and/or
|
||||||
|
/// its sources. This field is commonly used by P2P clients to store the source, and may include
|
||||||
|
/// the file hash.
|
||||||
|
pub xs: String,
|
||||||
|
///address TRacker: Tracker URL; used to obtain resources for BitTorrent downloads without a
|
||||||
|
/// need for DHT support. The value must be URL encoded
|
||||||
|
pub tr: Vec<String>,
|
||||||
|
///Keyword Topic: Specifies a string of search keywords to search for in P2P networks, rather
|
||||||
|
/// than a particular file. Also set as a vector since there will likely be more than one
|
||||||
|
pub kt: String,
|
||||||
|
///Web Seed: The payload data served over HTTP(S)
|
||||||
|
pub ws: String,
|
||||||
|
///Acceptable Source: Refers to a direct download from a web server. Regarded as only a
|
||||||
|
/// fall-back source in case a client is unable to locate and/or download the linked-to file in its supported P2P network(s)
|
||||||
|
///as is a reserved keyword in Rust, so unfortunately this library must use the full name
|
||||||
|
pub acceptable_source: String,
|
||||||
|
///Manifest Topic: Link to the metafile that contains a list of magneto (MAGMA –
|
||||||
|
/// MAGnet MAnifest); i.e. a link to a list of links
|
||||||
|
pub mt: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Magnet {
|
||||||
|
/**Given a magnet URL, identify the specific parts, and return the Magnet struct. If the program
|
||||||
|
can't identify a specific part of the magnet, then it will either give an empty version of what
|
||||||
|
its value would normally be (such as an empty string, an empty vector, or in the case of xl, -1)
|
||||||
|
*/
|
||||||
|
pub fn new (magnet_str: &str) -> Magnet {
|
||||||
|
lazy_static! {
|
||||||
|
static ref DISPLAY_NAME_RE: Regex = Regex::new(DISPLAY_NAME_RE_STR).unwrap();
|
||||||
|
static ref EXACT_TOPIC_RE: Regex = Regex::new(EXACT_TOPIC_RE_STR).unwrap();
|
||||||
|
static ref EXACT_LENGTH_RE: Regex = Regex::new(EXACT_LENGTH_RE_STR).unwrap();
|
||||||
|
static ref ADDRESS_TRACKER_RE: Regex = Regex::new(ADDRESS_TRACKER_RE_STR).unwrap();
|
||||||
|
static ref KEYWORD_TOPIC_RE: Regex = Regex::new(KEYWORD_TOPIC_RE_STR).unwrap();
|
||||||
|
static ref EXACT_SOURCE_RE: Regex = Regex::new(EXACT_SOURCE_RE_STR).unwrap();
|
||||||
|
static ref WEB_SEED_RE: Regex = Regex::new(WEB_SEED_RE_STR).unwrap();
|
||||||
|
static ref ACCEPTABLE_SOURCE_RE: Regex = Regex::new(ACCEPTABLE_SOURCE_RE_STR).unwrap();
|
||||||
|
static ref MANIFEST_TOPIC_RE: Regex = Regex::new(MANIFEST_TOPIC_RE_STR).unwrap();
|
||||||
|
static ref MAGNET_RE: Regex = Regex::new(MAGNET_RE_STR).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
let validate_regex = |regex: &Regex, re_group_index| -> String {
|
||||||
|
match regex.captures(magnet_str) {
|
||||||
|
Some(m) => m.get(re_group_index).map_or("", |m| m.as_str()).to_string(),
|
||||||
|
None => String::new()
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
Magnet {
|
||||||
|
dn: validate_regex(&DISPLAY_NAME_RE, 1),
|
||||||
|
hash_type: validate_regex(&EXACT_TOPIC_RE, 1),
|
||||||
|
xt: validate_regex(&EXACT_TOPIC_RE, 2),
|
||||||
|
xl: validate_regex(&EXACT_LENGTH_RE, 1).parse().unwrap_or(-1),
|
||||||
|
xs: validate_regex(&EXACT_SOURCE_RE, 1),
|
||||||
|
tr: {
|
||||||
|
let mut tr_vec: Vec<String> = Vec::new();
|
||||||
|
// Since tr is a vector, I can't just use the validate_regex function
|
||||||
|
if ADDRESS_TRACKER_RE.is_match(magnet_str) {
|
||||||
|
for tr in ADDRESS_TRACKER_RE.captures_iter(magnet_str) {
|
||||||
|
tr_vec.push(tr.get(1).map_or("", |m| m.as_str()).to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tr_vec
|
||||||
|
|
||||||
|
},
|
||||||
|
kt: validate_regex(&KEYWORD_TOPIC_RE, 1),
|
||||||
|
ws: validate_regex(&WEB_SEED_RE, 1),
|
||||||
|
acceptable_source: validate_regex(&ACCEPTABLE_SOURCE_RE, 1),
|
||||||
|
mt: validate_regex(&MANIFEST_TOPIC_RE, 1),
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue