mirror of
https://github.com/xcat2/xNBA.git
synced 2024-12-14 07:11:32 +00:00
Added a URI parser that should be standards conformant. (It can certainly
handle something as convoluted as ftp://joe:secret@insecure.org:8081/hidden/path/to?what=is#this
This commit is contained in:
parent
a5a330339b
commit
26166cf5e0
161
src/core/uri.c
Normal file
161
src/core/uri.c
Normal file
@ -0,0 +1,161 @@
|
||||
/*
|
||||
* Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*/
|
||||
|
||||
/** @file
|
||||
*
|
||||
* Uniform Resource Identifiers
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <gpxe/uri.h>
|
||||
|
||||
/**
|
||||
* Parse URI
|
||||
*
|
||||
* @v uri_string URI as a string
|
||||
* @ret uri URI
|
||||
*
|
||||
* Splits a URI into its component parts. The return URI structure is
|
||||
* dynamically allocated and must eventually be freed by calling
|
||||
* free_uri().
|
||||
*/
|
||||
struct uri * parse_uri ( const char *uri_string ) {
|
||||
struct uri *uri;
|
||||
char *raw;
|
||||
char *tmp;
|
||||
char *path = NULL;
|
||||
char *authority = NULL;
|
||||
size_t raw_len;
|
||||
|
||||
/* Allocate space for URI struct and a copy of the string */
|
||||
raw_len = ( strlen ( uri_string ) + 1 /* NUL */ );
|
||||
uri = malloc ( sizeof ( *uri ) + raw_len );
|
||||
if ( ! uri )
|
||||
return NULL;
|
||||
raw = ( ( ( char * ) uri ) + sizeof ( *uri ) );
|
||||
|
||||
/* Zero URI struct and copy in the raw string */
|
||||
memset ( uri, 0, sizeof ( *uri ) );
|
||||
memcpy ( raw, uri_string, raw_len );
|
||||
|
||||
/* Start by chopping off the fragment, if it exists */
|
||||
if ( ( tmp = strchr ( raw, '#' ) ) ) {
|
||||
*(tmp++) = '\0';
|
||||
uri->fragment = tmp;
|
||||
}
|
||||
|
||||
/* Identify absolute/relative URI */
|
||||
if ( ( tmp = strchr ( raw, ':' ) ) ) {
|
||||
/* Absolute URI: identify hierarchical/opaque */
|
||||
uri->scheme = raw;
|
||||
*(tmp++) = '\0';
|
||||
if ( *tmp == '/' ) {
|
||||
/* Absolute URI with hierarchical part */
|
||||
path = tmp;
|
||||
} else {
|
||||
/* Absolute URI with opaque part */
|
||||
uri->opaque = tmp;
|
||||
}
|
||||
} else {
|
||||
/* Relative URI */
|
||||
path = raw;
|
||||
}
|
||||
|
||||
/* If we don't have a path (i.e. we have an absolute URI with
|
||||
* an opaque portion, we're already finished processing
|
||||
*/
|
||||
if ( ! path )
|
||||
goto done;
|
||||
|
||||
/* Chop off the query, if it exists */
|
||||
if ( ( tmp = strchr ( path, '?' ) ) ) {
|
||||
*(tmp++) = '\0';
|
||||
uri->query = tmp;
|
||||
}
|
||||
|
||||
/* Identify net/absolute/relative path */
|
||||
if ( strncmp ( path, "//", 2 ) == 0 ) {
|
||||
/* Net path. If this is terminated by the first '/'
|
||||
* of an absolute path, then we have no space for a
|
||||
* terminator after the authority field, so shuffle
|
||||
* the authority down by one byte, overwriting one of
|
||||
* the two slashes.
|
||||
*/
|
||||
authority = ( path + 2 );
|
||||
if ( ( tmp = strchr ( authority, '/' ) ) ) {
|
||||
/* Shuffle down */
|
||||
uri->path = tmp;
|
||||
memmove ( ( authority - 1 ), authority,
|
||||
( tmp - authority ) );
|
||||
authority--;
|
||||
*(--tmp) = '\0';
|
||||
}
|
||||
} else {
|
||||
/* Absolute/relative path */
|
||||
uri->path = path;
|
||||
}
|
||||
|
||||
/* Split authority into user[:password] and host[:port] portions */
|
||||
if ( ( tmp = strchr ( authority, '@' ) ) ) {
|
||||
/* Has user[:password] */
|
||||
*(tmp++) = '\0';
|
||||
uri->host = tmp;
|
||||
uri->user = authority;
|
||||
if ( ( tmp = strchr ( authority, ':' ) ) ) {
|
||||
/* Has password */
|
||||
*(tmp++) = '\0';
|
||||
uri->password = tmp;
|
||||
}
|
||||
} else {
|
||||
/* No user:password */
|
||||
uri->host = authority;
|
||||
}
|
||||
|
||||
/* Split host into host[:port] */
|
||||
if ( ( tmp = strchr ( uri->host, ':' ) ) ) {
|
||||
*(tmp++) = '\0';
|
||||
uri->port = tmp;
|
||||
}
|
||||
|
||||
done:
|
||||
DBG ( "URI \"%s\" split into", raw );
|
||||
if ( uri->scheme )
|
||||
DBG ( " scheme \"%s\"", uri->scheme );
|
||||
if ( uri->opaque )
|
||||
DBG ( " opaque \"%s\"", uri->opaque );
|
||||
if ( uri->user )
|
||||
DBG ( " user \"%s\"", uri->user );
|
||||
if ( uri->password )
|
||||
DBG ( " password \"%s\"", uri->password );
|
||||
if ( uri->host )
|
||||
DBG ( " host \"%s\"", uri->host );
|
||||
if ( uri->port )
|
||||
DBG ( " port \"%s\"", uri->port );
|
||||
if ( uri->path )
|
||||
DBG ( " path \"%s\"", uri->path );
|
||||
if ( uri->query )
|
||||
DBG ( " query \"%s\"", uri->query );
|
||||
if ( uri->fragment )
|
||||
DBG ( " fragment \"%s\"", uri->fragment );
|
||||
DBG ( "\n" );
|
||||
|
||||
return uri;
|
||||
}
|
116
src/include/gpxe/uri.h
Normal file
116
src/include/gpxe/uri.h
Normal file
@ -0,0 +1,116 @@
|
||||
#ifndef _GPXE_URI_H
|
||||
#define _GPXE_URI_H
|
||||
|
||||
/** @file
|
||||
*
|
||||
* Uniform Resource Identifiers
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/** A Uniform Resource Identifier
|
||||
*
|
||||
* Terminology for this data structure is as per uri(7), except that
|
||||
* "path" is defined to include the leading '/' for an absolute path.
|
||||
*
|
||||
* Note that all fields within a URI are optional and may be NULL.
|
||||
*
|
||||
* Some examples are probably helpful:
|
||||
*
|
||||
* http://www.etherboot.org/wiki :
|
||||
*
|
||||
* scheme = "http", host = "www.etherboot.org", path = "/wiki"
|
||||
*
|
||||
* /var/lib/tftpboot :
|
||||
*
|
||||
* path = "/var/lib/tftpboot"
|
||||
*
|
||||
* mailto:bob@nowhere.com :
|
||||
*
|
||||
* scheme = "mailto", opaque = "bob@nowhere.com"
|
||||
*
|
||||
* ftp://joe:secret@insecure.org:8081/hidden/path/to?what=is#this
|
||||
*
|
||||
* scheme = "ftp", user = "joe", password = "secret",
|
||||
* host = "insecure.org", port = "8081", path = "/hidden/path/to",
|
||||
* query = "what=is", fragment = "this"
|
||||
*/
|
||||
struct uri {
|
||||
/** Scheme */
|
||||
const char *scheme;
|
||||
/** Opaque part */
|
||||
const char *opaque;
|
||||
/** User name */
|
||||
const char *user;
|
||||
/** Password */
|
||||
const char *password;
|
||||
/** Host name */
|
||||
const char *host;
|
||||
/** Port number */
|
||||
const char *port;
|
||||
/** Path */
|
||||
const char *path;
|
||||
/** Query */
|
||||
const char *query;
|
||||
/** Fragment */
|
||||
const char *fragment;
|
||||
};
|
||||
|
||||
/**
|
||||
* URI is an absolute URI
|
||||
*
|
||||
* @v uri URI
|
||||
* @ret is_absolute URI is absolute
|
||||
*
|
||||
* An absolute URI begins with a scheme, e.g. "http:" or "mailto:".
|
||||
* Note that this is a separate concept from a URI with an absolute
|
||||
* path.
|
||||
*/
|
||||
static inline int uri_is_absolute ( struct uri *uri ) {
|
||||
return ( uri->scheme != NULL );
|
||||
}
|
||||
|
||||
/**
|
||||
* URI has an absolute path
|
||||
*
|
||||
* @v uri URI
|
||||
* @ret has_absolute_path URI has an absolute path
|
||||
*
|
||||
* An absolute path begins with a '/'. Note that this is a separate
|
||||
* concept from an absolute URI. Note also that a URI may not have a
|
||||
* path at all.
|
||||
*/
|
||||
static inline int uri_has_absolute_path ( struct uri *uri ) {
|
||||
return ( uri->path && ( uri->path[0] == '/' ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* URI has a relative path
|
||||
*
|
||||
* @v uri URI
|
||||
* @ret has_relative_path URI has a relative path
|
||||
*
|
||||
* An relative path begins with something other than a '/'. Note that
|
||||
* this is a separate concept from a relative URI. Note also that a
|
||||
* URI may not have a path at all.
|
||||
*/
|
||||
static inline int uri_has_relative_path ( struct uri *uri ) {
|
||||
return ( uri->path && ( uri->path[0] != '/' ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Free URI structure
|
||||
*
|
||||
* @v uri URI
|
||||
*
|
||||
* Frees all the dynamically-allocated storage used by the URI
|
||||
* structure.
|
||||
*/
|
||||
static inline void free_uri ( struct uri *uri ) {
|
||||
free ( uri );
|
||||
}
|
||||
|
||||
extern struct uri * parse_uri ( const char *uri_string );
|
||||
|
||||
#endif /* _GPXE_URI_H */
|
Loading…
Reference in New Issue
Block a user