libzypp 17.37.17
metalinkparser.cc
Go to the documentation of this file.
1/*---------------------------------------------------------------------\
2| ____ _ __ __ ___ |
3| |__ / \ / / . \ . \ |
4| / / \ V /| _/ _/ |
5| / /__ | | | | | | |
6| /_____||_| |_| |_| |
7| |
8\---------------------------------------------------------------------*/
12
13#include "metalinkparser.h"
16#include <zypp-core/ByteArray.h>
18
19#include <stack>
20#include <vector>
21#include <algorithm>
22
23#include <libxml/SAX2.h>
24
25using namespace zypp::base;
26
27namespace zypp::env
28{
30 inline bool ZYPP_METALINK_DEBUG()
31 {
32 static bool val = [](){
33 const char * env = getenv("ZYPP_METALINK_DEBUG");
34 return( env && zypp::str::strToBool( env, true ) );
35 }();
36 return val;
37 }
38}
39
40namespace zypp::media {
61
62 struct transition {
63 std::string elementName; //< Name of the element for the transition to trigger
64 ParserState transitionTo; //< The state we go into when the element name in \a elementName is encountered
65 int docontent; //< Store the content of the element in the \a content member
66 };
67
73 const std::unordered_map<ParserState, std::vector<transition> > & transitions () {
74 static std::unordered_map<ParserState, std::vector<transition> > map {
75 { STATE_START, {
76 { "metalink", STATE_METALINK, 0},
77 }
78 },
80 { "files", STATE_FILES, 0 },
81 { "file", STATE_M4FILE, 0 },
82 }
83 },
84 { STATE_FILES, {
85 { "file", STATE_FILE, 0},
86 }
87 },
88 { STATE_FILE, {
89 { "size", STATE_SIZE, 1 },
90 { "verification", STATE_VERIFICATION, 0 },
91 { "resources", STATE_RESOURCES, 0 },
92 }
93 },
95 { "hash", STATE_HASH, 1 },
96 { "pieces", STATE_PIECES, 0 },
97 }
98 },
99 { STATE_PIECES, {
100 { "hash", STATE_PHASH, 1 },
101 }
102 },
103 { STATE_RESOURCES, {
104 { "url", STATE_URL, 1 },
105 }
106 },
107 { STATE_M4FILE, {
108 { "size", STATE_M4SIZE, 1 },
109 { "hash", STATE_M4HASH, 1},
110 { "url", STATE_M4URL, 1},
111 { "pieces", STATE_M4PIECES, 0},
112 }
113 },
114 { STATE_M4PIECES, {
115 { "hash", STATE_M4PHASH, 1 },
116 }
117 },
118 };
119
120 return map;
121 }
122
123static void XMLCALL startElement(void *userData, const xmlChar *name, const xmlChar **atts);
124static void XMLCALL endElement(void *userData, const xmlChar *name);
125static void XMLCALL characterData(void *userData, const xmlChar *s, int len);
126static void XMLCALL parseError(void *userData, const xmlError *error);
127
130 : parser( nullptr )
131 , state( STATE_START )
132 , depth( 0 )
133 , statedepth( 0 )
134 , docontent( 0 )
135 , gotfile( 0 )
136 , size( -1 )
137 , blksize( 0 )
138 , piecel( 0 )
139 , chksuml( 0 )
140 {
141 content.reserve( 256 );
142
143 xmlSAXHandler sax;
144 memset(&sax, 0, sizeof(sax));
145 sax.startElement = startElement;
146 sax.endElement = endElement;
147 sax.characters = characterData;
148
149 //internally creates a copy of xmlSaxHandler, so having it as local variable is save
150 parser = AutoDispose<xmlParserCtxtPtr>( xmlCreatePushParserCtxt(&sax, this, NULL, 0, NULL), xmlFreeParserCtxt );
151#ifdef HAVE_LIBXML2_XMLCTXTSETERRORHANDLER
152 xmlCtxtSetErrorHandler ( parser, parseError, this );
153#else
154 xmlSetStructuredErrorFunc ( this, (xmlStructuredErrorFunc)parseError );
155#endif
156 }
157
159#ifndef HAVE_LIBXML2_XMLCTXTSETERRORHANDLER
160 xmlSetStructuredErrorFunc ( nullptr, nullptr );
161#endif
162 }
163
164 void doTransition ( const transition &t ) {
165 parentStates.push( state );
169 content.clear();
170 }
171
172 void popState () {
173 state = parentStates.top();
174 statedepth--;
175 parentStates.pop();
176
177 }
178
180
181 ParserState state; //< current state as defined in \ref stateswitch
182 std::stack<ParserState> parentStates;
183
184 int depth; //< current element depth of traversing the document elements
185
192
193 std::string content; //< content of the current element
194 int docontent; //< should the content of the current elem be parsed
195
197 off_t size;
198 std::vector<MetalinkMirror> urls;
199 size_t blksize;
200
201 std::vector<UByteArray> piece;
203
204 std::vector<UByteArray> sha1;
205 std::vector<UByteArray> zsync;
206
209
210 std::optional<filesystem::Pathname> _filename; // if the filename is known, we can find it here
211 std::exception_ptr _lastError; // if a error was encountered during XML parsing we remember it here
212};
213
218static const char *
219find_attr(const char *txt, const xmlChar **atts)
220{
221 if(!atts) {
222 return nullptr;
223 }
224
225 for (; *atts; atts += 2)
226 {
227 if (!strcmp(reinterpret_cast<const char*>(*atts), txt))
228 return reinterpret_cast<const char*>(atts[1]);
229 }
230 return nullptr;
231}
232
233static void XMLCALL
234startElement(void *userData, const xmlChar *name, const xmlChar **atts)
235{
236 struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
237
238 // if the current element depth does not match the expected depth for the current state we
239 // ignore the element and just increase the depth
240 if (pd->depth != pd->statedepth) {
241 pd->depth++;
242 return;
243 }
244 pd->depth++;
245
246 const auto &trMap = transitions();
247 const auto currStateTrs = trMap.find( pd->state );
248 if ( currStateTrs == trMap.end() )
249 return;
250
251 // check if the current element name is part of our transitions
252 auto foundTr = std::find_if( currStateTrs->second.begin(), currStateTrs->second.end(), [name]( const auto &tr ){
253 return tr.elementName == reinterpret_cast<const char *>(name);
254 });
255
256 if ( foundTr == currStateTrs->second.end() ) {
257 // we found no possible transition, ignore
258 return;
259 }
260
261 if ( ( foundTr->transitionTo == STATE_FILE || foundTr->transitionTo == STATE_M4FILE ) && pd->gotfile++)
262 return; /* ignore all but the first file */
263
264 // advance the state machine and prepare variables for the new state
265 pd->doTransition( *foundTr );
266
267 switch(pd->state)
268 {
269 case STATE_URL:
270 case STATE_M4URL:
271 {
272 const char *priority = find_attr("priority", atts);
273 const char *preference = find_attr("preference", atts);
274 const char *maxconnections = find_attr("maxconnections", atts);
275 int prio = 0;
276 auto &mirr = pd->urls.emplace_back();
277 if (priority)
278 prio = str::strtonum<int>(priority);
279 else if (preference)
280 prio = 101 - str::strtonum<int>(preference);
281 else
282 prio = 999999;
283 mirr.priority = prio;
284
285 if ( maxconnections )
286 mirr.maxConnections = str::strtonum<int>( maxconnections );
287
288 break;
289 }
290 case STATE_PIECES:
291 case STATE_M4PIECES:
292 {
293 const char *type = find_attr("type", atts);
294 const char *length = find_attr("length", atts);
295 size_t blksize = 0;
296
297 if (!type || !length)
298 {
299 pd->popState();
300 break;
301 }
302 blksize = str::strtonum<unsigned long>(length);
303 if (!blksize || (pd->blksize && pd->blksize != blksize))
304 {
305 pd->popState();
306 break;
307 }
308 pd->blksize = blksize;
309 pd->piece.clear();
310 if (!strcmp(type, "sha1") || !strcmp(type, "sha-1"))
311 pd->piecel = 20;
312 else if (!strcmp(type, "zsync"))
313 pd->piecel = 4;
314 else
315 {
316 pd->popState();
317 break;
318 }
319 break;
320 }
321 case STATE_HASH:
322 case STATE_M4HASH:
323 {
324 const char *type = find_attr("type", atts);
325 if (!type)
326 type = "?";
327 if ((!strcmp(type, "sha1") || !strcmp(type, "sha-1")) && pd->chksuml < 20)
328 pd->chksuml = 20;
329 else if (!strcmp(type, "sha256") || !strcmp(type, "sha-256"))
330 pd->chksuml = 32;
331 else
332 {
333 pd->popState();
334 pd->docontent = 0;
335 }
336 break;
337 }
338 case STATE_PHASH:
339 case STATE_M4PHASH:
340 {
341 const char *piece = find_attr("piece", atts);
342 if ( pd->state == STATE_PHASH && (!piece || str::strtonum<uint>(piece) != pd->piece.size()) )
343 {
344 pd->popState();
345 }
346 break;
347 }
348 default:
349 break;
350 }
351}
352
353UByteArray hexstr2bytes( const std::string& str )
354{
355 return Digest::hexStringToUByteArray( str );
356}
357
358static void XMLCALL
359endElement(void *userData, const xmlChar *)
360{
361 struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
362 //printf("end depth %d-%d name %s\n", pd->depth, pd->statedepth, name);
363 if (pd->depth != pd->statedepth)
364 {
365 pd->depth--;
366 return;
367 }
368 switch (pd->state)
369 {
370 case STATE_SIZE:
371 case STATE_M4SIZE:
372 pd->size = (off_t)str::strtonum<off_t>(pd->content); //strtoull(pd->content, 0, 10);
373 break;
374 case STATE_HASH:
375 case STATE_M4HASH:
376 pd->chksum.clear();
377 pd->chksum = hexstr2bytes( pd->content );
378 if ( pd->content.length() != size_t(pd->chksuml) * 2 || !pd->chksum.size() )
379 {
380 pd->chksum.clear();
381 pd->chksuml = 0;
382 }
383 break;
384 case STATE_PHASH:
385 case STATE_M4PHASH: {
386 if ( pd->content.length() != size_t(pd->piecel) * 2 )
387 break;
388 UByteArray pieceHash = hexstr2bytes( pd->content );
389 if ( !pieceHash.size() )
390 pieceHash.resize( pd->piecel, 0 );
391 pd->piece.push_back( pieceHash );
392 break;
393 }
394 case STATE_PIECES:
395 case STATE_M4PIECES:
396 if (pd->piecel == 4)
397 pd->zsync = pd->piece;
398 else
399 pd->sha1 = pd->piece;
400
401 pd->piecel = 0;
402 pd->piece.clear();
403 break;
404 case STATE_URL:
405 case STATE_M4URL:
406 if ( pd->content.length() )
407 pd->urls.back().url = std::string(pd->content);
408 else
409 // without a actual URL the mirror is useless
410 pd->urls.pop_back();
411 break;
412 default:
413 break;
414 }
415
416 pd->depth--;
417 pd->popState();
418 pd->docontent = 0;
419}
420
421static void XMLCALL
422characterData(void *userData, const xmlChar *s, int len)
423{
424 struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
425 if (!pd->docontent)
426 return;
427
428 if ( pd->content.length() + len + 1 > pd->content.capacity() )
429 pd->content.reserve( pd->content.capacity() + 256 );
430 pd->content.append( s, s+len );
431}
432
433static void XMLCALL parseError(void *userData, const xmlError *error)
434{
435 struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
436 if (!pd)
437 return;
438
439 ERR << "Parse error in " << (pd->_filename ? pd->_filename->asString() : std::string("unknown filename")) << " : " << error->message << std::endl;
440
441 auto ex = parser::ParseException( str::Str() << "Parse error in " << (pd->_filename ? pd->_filename->asString() : std::string("unknown filename")) << " : " << error->message ) ;
442 if ( pd->_lastError )
443 ex.remember (pd->_lastError);
444 pd->_lastError = std::make_exception_ptr (ex);
445}
446
447
451
453{
454 delete pd;
455}
456
457void
459{
460 pd->_filename = filename;
461 zypp_defer {
462 pd->_filename.reset();
463 };
464 parse(InputStream(filename));
465}
466
467void
469{
470 char buf[4096];
471 if (!is.stream())
472 ZYPP_THROW(parser::ParseException("MetaLinkParser: no such file"));
473
474 pd->_lastError = {};
475 zypp_defer {
476 // clear the error when we leave this function
477 pd->_lastError = {};
478 };
479
480 while (is.stream().good())
481 {
482 is.stream().read(buf, sizeof(buf));
483 parseBytes(buf, is.stream().gcount());
484 }
485 parseEnd();
486 MIL << "Parsed " << pd->urls.size() << " mirrors from " << is.path() << std::endl;
487 if ( env::ZYPP_METALINK_DEBUG() ) {
488 for ( const auto &mirr : pd->urls )
489 DBG << "- " << mirr.priority << " " << mirr.url << std::endl;
490 }
491}
492
493void
494MetaLinkParser::parseBytes(const char *buf, size_t len)
495{
496 if (!len)
497 return;
498
499 if (xmlParseChunk(pd->parser, buf, len, 0)) {
500 if ( pd->_lastError )
501 ZYPP_RETHROW(pd->_lastError);
502 else
503 ZYPP_THROW(parser::ParseException("Parse Error"));
504 }
505}
506
507void
509{
510 if (xmlParseChunk(pd->parser, NULL, 0, 1)) {
511 if ( pd->_lastError )
512 ZYPP_RETHROW(pd->_lastError);
513 else
514 ZYPP_THROW(parser::ParseException("Parse Error"));
515 }
516 if (pd->urls.size() ) {
517 stable_sort(pd->urls.begin(), pd->urls.end(), []( const auto &a, const auto &b ){
518 return a.priority < b.priority;
519 });
520 }
521}
522
523std::vector<Url>
525{
526 std::vector<Url> urls;
527 urls.reserve(pd->urls.size());
528 for ( const auto &mirr : pd->urls )
529 urls.push_back( mirr.url );
530 return urls;
531}
532
533const std::vector<MetalinkMirror> &MetaLinkParser::getMirrors() const
534{
535 return pd->urls;
536}
537
539{
540 MediaBlockList bl(pd->size);
541 if (pd->chksuml == 20)
542 bl.setFileChecksum("SHA1", pd->chksuml, pd->chksum.data() );
543 else if (pd->chksuml == 32)
544 bl.setFileChecksum("SHA256", pd->chksuml, pd->chksum.data());
545 if (pd->size != off_t(-1) && pd->blksize)
546 {
547 size_t nb = (pd->size + pd->blksize - 1) / pd->blksize;
548 off_t off = 0;
549 size_t size = pd->blksize;
550 for ( size_t i = 0; i < nb; i++ )
551 {
552 if (i == nb - 1)
553 {
554 size = pd->size % pd->blksize;
555 if (!size)
556 size = pd->blksize;
557 }
558 size_t blkno = bl.addBlock(off, size);
559 if ( i < pd->sha1.size())
560 {
561 bl.setChecksum(blkno, "SHA1", 20, pd->sha1[i].data());
562 if ( i < pd->zsync.size())
563 {
564 unsigned char *p = pd->zsync[i].data();
565 bl.setRsum(blkno, 4, p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24, pd->blksize);
566 }
567 }
568 off += pd->blksize;
569 }
570 }
571 return bl;
572}
573
574const std::vector<UByteArray> &MetaLinkParser::getZsyncBlockHashes() const
575{
576 return pd->zsync;
577}
578
579const std::vector<UByteArray> &MetaLinkParser::getSHA1BlockHashes() const
580{
581 return pd->sha1;
582}
583
584} // namespace zypp::media
Reference counted access to a Tp object calling a custom Dispose function when the last AutoDispose h...
Definition AutoDispose.h:95
Helper to create and pass std::istream.
Definition inputstream.h:57
std::istream & stream() const
The std::istream.
Definition inputstream.h:93
const Pathname & path() const
Path to the input file or empty if no file.
void setRsum(size_t blkno, int rsl, unsigned int rs, size_t rspad=0)
set / verify the (weak) rolling checksum over a single block
void setFileChecksum(std::string ctype, int cl, unsigned char *c)
set / verify the checksum over the whole file
size_t addBlock(off_t off, size_t size)
add a block with offset off and size size to the block list.
void setChecksum(size_t blkno, const std::string &cstype, int csl, unsigned char *cs, size_t cspad=0)
set / verify the (strong) checksum over a single block
void parseEnd()
tells the parser that all chunks are now processed
struct ml_parsedata * pd
void parse(const Pathname &filename)
parse a file consisting of metalink xml data
MediaBlockList getBlockList() const
return the block list from the parsed metalink data
void parseBytes(const char *bytes, size_t len)
parse a chunk of a file consisting of metalink xml data.
const std::vector< UByteArray > & getSHA1BlockHashes() const
const std::vector< UByteArray > & getZsyncBlockHashes() const
const std::vector< MetalinkMirror > & getMirrors() const
return the mirrors from the parsed metalink data
std::vector< Url > getUrls() const
return the download urls from the parsed metalink data
unsigned short a
unsigned short b
String related utilities and Regular expression matching.
boost::noncopyable NonCopyable
Ensure derived classes cannot be copied.
Definition NonCopyable.h:26
Namespace intended to collect all environment variables we use.
Definition Env.h:25
bool ZYPP_METALINK_DEBUG()
Hack to circumvent the currently poor –root support.
static void XMLCALL characterData(void *userData, const xmlChar *s, int len)
static const char * find_attr(const char *txt, const xmlChar **atts)
Look up a xml attribute in the passed array atts.
static void XMLCALL endElement(void *userData, const xmlChar *name)
static void XMLCALL startElement(void *userData, const xmlChar *name, const xmlChar **atts)
UByteArray hexstr2bytes(const std::string &str)
static void XMLCALL parseError(void *userData, const xmlError *error)
const std::unordered_map< ParserState, std::vector< transition > > & transitions()
bool strToBool(const C_Str &str, bool default_r)
Parse str into a bool depending on the default value.
Definition String.h:500
TInt strtonum(const C_Str &str)
Parsing numbers from string.
std::optional< filesystem::Pathname > _filename
std::vector< UByteArray > zsync
AutoDispose< xmlParserCtxtPtr > parser
void doTransition(const transition &t)
std::exception_ptr _lastError
std::vector< MetalinkMirror > urls
std::vector< UByteArray > piece
std::stack< ParserState > parentStates
std::vector< UByteArray > sha1
Convenient building of std::string via std::ostringstream Basically a std::ostringstream autoconverti...
Definition String.h:213
#define zypp_defer
#define ZYPP_RETHROW(EXCPT)
Drops a logline and rethrows, updating the CodeLocation.
Definition Exception.h:479
#define ZYPP_THROW(EXCPT)
Drops a logline and throws the Exception.
Definition Exception.h:459
#define DBG
Definition Logger.h:99
#define MIL
Definition Logger.h:100
#define ERR
Definition Logger.h:102