RESTinio
multipart_body.hpp
Go to the documentation of this file.
1 /*
2  * RESTinio
3  */
4 
5 /*!
6  * @file
7  * @brief Various tools for working with multipart bodies.
8  *
9  * @since v.0.6.1
10  */
11 
12 #pragma once
13 
14 #include <restinio/helpers/string_algo.hpp>
15 #include <restinio/helpers/easy_parser.hpp>
16 #include <restinio/helpers/http_field_parsers/basics.hpp>
17 #include <restinio/helpers/http_field_parsers/content-type.hpp>
18 
19 #include <restinio/http_headers.hpp>
20 #include <restinio/request_handler.hpp>
21 #include <restinio/expected.hpp>
22 
23 #include <restinio/impl/string_caseless_compare.hpp>
24 
25 #include <restinio/utils/metaprogramming.hpp>
26 
27 #include <iostream>
28 
29 namespace restinio
30 {
31 
32 namespace multipart_body
33 {
34 
35 //
36 // split_multipart_body
37 //
38 /*!
39  * @brief Helper function for spliting a multipart body into a serie of
40  * separate parts.
41  *
42  * @return A list of separate parts. This list will be empty if no parts
43  * are found or if there is some error in the body format (for example if
44  * some part is opened by @a boundary but is not closed properly).
45  *
46  * @note
47  * A user should extract the value of @a boundary should from Content-Type
48  * field and modify it proper way (two leading hypens should be added to the
49  * value of "boundary" parameter) by him/herself. Helper function
50  * detect_boundary_for_multipart_body() can be used for that purpose.
51  *
52  * Usage example:
53  * @code
54  * using namespace restinio::multipart_body;
55  *
56  * const auto boundary = detect_boundary_for_multipart_body(
57  * req, "multipart", "form-data" );
58  * if( boundary )
59  * {
60  * const auto parts = split_multipart_body( req.body(), *boundary );
61  * for( restinio::string_view_t one_part : parts )
62  * {
63  * ... // Handling of a part.
64  * }
65  * }
66  * @endcode
67  *
68  * @since v.0.6.1
69  */
71 inline std::vector< string_view_t >
75 {
76  using namespace restinio::string_algo;
77 
80 
81  const string_view_t eol{ "\r\n" };
82  const string_view_t last_separator{ "--\r\n" };
83 
84  // Find the first boundary.
85  auto boundary_pos = body.find( boundary );
87  // There is no initial separator in the body.
88  return result;
89 
90  // The first body can be at the very begining of the body or
91  // there should be CRLF before the initial boundary.
92  if( boundary_pos != 0u &&
93  (boundary_pos < eol.size() ||
94  body.substr( boundary_pos - eol.size(), eol.size() ) != eol) )
95  return result;
96 
99  // The start boundary is the last boundary.
100  return result;
101 
102  while( starts_with( remaining_body, eol ) )
103  {
105 
108  return result;
109 
110  // There should be CRLF before the next boundary.
111  if( boundary_pos < eol.size() ||
113  return result;
114 
117 
119  // Is this boundary the last one?
121  {
122  // Yes, our iteration can be stopped and we can return the result.
123  swap( tmp_result, result );
124  return result;
125  }
126  }
127 
128  // We didn't find the last boundary. Or some error encountered in the format
129  // of the body.
130  //
131  // Empty result should be returned.
132  return result;
133 }
134 
135 //
136 // parsed_part_t
137 //
138 /*!
139  * @brief A description of parsed content of one part of a multipart body.
140  *
141  * @since v.0.6.1
142  */
144 {
145  //! HTTP-fields local for that part.
146  /*!
147  * @note
148  * It can be empty if no HTTP-fields are found for that part.
149  */
151  //! The body of that part.
153 };
154 
155 namespace impl
156 {
157 
158 namespace parser_details
159 {
160 
161 using namespace restinio::http_field_parsers;
162 
163 namespace easy_parser = restinio::easy_parser;
164 
165 constexpr char CR = '\r';
166 constexpr char LF = '\n';
167 
168 //
169 // body_producer_t
170 //
171 /*!
172  * @brief A special producer that consumes the whole remaining
173  * content from the input stream.
174  *
175  * @attention
176  * This producer can be seen as a hack. It can't be used safely
177  * outside the context for that this producer was created. It's because
178  * body_producer_t doesn't shift the current position in the input
179  * stream.
180  *
181  * @since v.0.6.1
182  */
185 {
186  RESTINIO_NODISCARD
187  expected_t< string_view_t, easy_parser::parse_error_t >
188  try_parse( easy_parser::impl::source_t & from ) const noexcept
189  {
190  // Return the whole content from the current position.
191  return from.fragment( from.current_position() );
192  }
193 };
194 
195 //
196 // field_value_producer_t
197 //
198 /*!
199  * @brief A special producer that consumes the rest of the current
200  * line in the input stream until CR/LF will be found.
201  *
202  * @note
203  * CR and LF symbols are not consumed from the input stream.
204  *
205  * @since v.0.6.1
206  */
208  : public easy_parser::impl::producer_tag< std::string >
209 {
210  RESTINIO_NODISCARD
211  expected_t< std::string, easy_parser::parse_error_t >
213  {
215  auto ch = from.getch();
216  while( !ch.m_eof && ch.m_ch != CR && ch.m_ch != LF )
217  {
218  accumulator += ch.m_ch;
219  ch = from.getch();
220  }
221 
222  if( ch.m_eof )
226  } );
227 
228  // CR or LF symbol should be returned back.
229  from.putback();
230 
231  return { std::move(accumulator) };
232  }
233 };
234 
235 } /* namespace parser_details */
236 
237 //
238 // make_parser
239 //
240 /*!
241  * @brief A factory function for a parser of a part of multipart message.
242  *
243  * Handles the following rule:
244 @verbatim
245 part := *( token ':' OWS field-value CR LF ) CR LF body
246 @endverbatim
247  *
248  * Produces parsed_part_t instance.
249  *
250  * @since v.0.6.1
251  */
253 auto
255 {
256  using namespace parser_details;
257 
258  return produce< parsed_part_t >(
260  repeat( 0, N,
262  token_p() >> to_lower() >> custom_consumer(
263  [](auto & f, std::string && v) {
264  f.name(std::move(v));
265  } ),
266  symbol(':'),
267  ows(),
269  [](auto & f, std::string && v) {
270  f.value(std::move(v));
271  } ),
272  symbol(CR), symbol(LF)
273  ) >> custom_consumer(
274  [](auto & to, http_header_field_t && v) {
275  to.add_field( std::move(v) );
276  } )
277  )
278  ) >> &parsed_part_t::fields,
279  symbol(CR), symbol(LF),
281 }
282 
283 } /* namespace impl */
284 
285 //
286 // try_parse_part
287 //
288 /*!
289  * @brief Helper function for parsing content of one part of a multipart body.
290  *
291  * This function is intended to be used with split_multipart_body():
292  * @code
293  * using namespace restinio::multipart_body;
294  *
295  * const auto boundary = detect_boundary_for_multipart_body(
296  * req, "multipart", "form-data" );
297  * if( boundary )
298  * {
299  * const auto parts = split_multipart_body( req.body(), *boundary );
300  * for( restinio::string_view_t one_part : parts )
301  * {
302  * const auto parsed_part = try_parse_part( one_part );
303  * if( parsed_part )
304  * {
305  * ... // Handle the content of the parsed part.
306  * }
307  * }
308  * }
309  * @endcode
310  *
311  * @since v.0.6.1
312  */
313 RESTINIO_NODISCARD
316 {
317  namespace easy_parser = restinio::easy_parser;
318 
320 
321  auto actual_producer = impl::make_parser();
322 
323  return easy_parser::impl::top_level_clause_t< decltype(actual_producer) >{
325  }.try_process( source );
326 }
327 
328 //
329 // handling_result_t
330 //
331 /*!
332  * @brief The result to be returned from user-provided handler of
333  * parts of multipart body.
334  *
335  * @since v.0.6.1
336  */
338 {
339  //! Enumeration of parts should be continued.
340  //! If there is another part the user-provided handler will
341  //! be called for it.
343  //! Enumeration of parts should be stopped.
344  //! All remaining parts of multipart body will be skipped.
345  //! But the result of the enumeration will be successful.
347  //! Enumeration of parts should be ignored.
348  //! All remaining parts of multipart body will be skipped and
349  //! the result of the enumeration will be a failure.
351 };
352 
353 //
354 // enumeration_error_t
355 //
356 /*!
357  * @brief The result of an attempt to enumerate parts of a multipart body.
358  *
359  * @since v.0.6.1
360  */
362 {
363  //! Content-Type field is not found.
364  //! If Content-Type is absent there is no way to detect 'boundary'
365  //! parameter.
367  //! Unable to parse Content-Type field value.
369  //! Content-Type field value parsed but doesn't contain an appropriate
370  //! value. For example there can be media-type different from 'multipart'
371  //! or 'boundary' parameter can be absent.
373  //! Value of 'boundary' parameter is invalid (for example it contains
374  //! some illegal characters).
376  //! No parts of a multipart body actually found.
378  //! Enumeration of parts was aborted by user-provided handler.
379  //! This code is returned when user-provided handler returns
380  //! handling_result_t::terminate_enumeration.
382  //! Some unexpected error encountered during the enumeration.
384 };
385 
386 namespace impl
387 {
388 
390 {
391 
392 // From https://tools.ietf.org/html/rfc1521:
393 //
394 // boundary := 0*69<bchars> bcharsnospace
395 //
396 // bchars := bcharsnospace / " "
397 //
398 // bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" /"_"
399 // / "," / "-" / "." / "/" / ":" / "=" / "?"
400 //
402 constexpr bool
404 {
405  return (ch >= '0' && ch <= '9') // DIGIT
406  || ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) // ALPHA
407  || ch == '\''
408  || ch == '('
409  || ch == ')'
410  || ch == '+'
411  || ch == '_'
412  || ch == ','
413  || ch == '-'
414  || ch == '.'
415  || ch == '/'
416  || ch == ':'
417  || ch == '='
418  || ch == '?';
419 }
420 
422 constexpr bool
423 is_bchar( char ch )
424 {
425  return is_bcharnospace(ch) || ch == ' ';
426 }
427 
428 } /* namespace boundary_value_checkers */
429 
430 } /* namespace impl */
431 
432 //
433 // check_boundary_value
434 //
435 /*!
436  * @brief A helper function for checking the validity of 'boundary' value.
437  *
438  * The allowed format for 'boundary' value is defined here:
439  * https://tools.ietf.org/html/rfc2046
440 @verbatim
441  boundary := 0*69<bchars> bcharsnospace
442 
443  bchars := bcharsnospace / " "
444 
445  bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" /
446  "+" / "_" / "," / "-" / "." /
447  "/" / ":" / "=" / "?"
448 @endverbatim
449  *
450  * @return enumeration_error_t::illegal_boundary_value if @a value has
451  * illegal value or an empty optional if there is no errros detected.
452  *
453  * @since v.0.6.1
454  */
458 {
459  using namespace impl::boundary_value_checkers;
460 
461  if( value.size() >= 1u && value.size() <= 70u )
462  {
463  const std::size_t last_index = value.size() - 1u;
464  for( std::size_t i = 0u; i != last_index; ++i )
465  if( !is_bchar( value[i] ) )
467 
468  if( !is_bcharnospace( value[ last_index ] ) )
470  }
471  else
473 
474  return nullopt;
475 }
476 
477 //
478 // detect_boundary_for_multipart_body
479 //
480 /*!
481  * @brief Helper function for parsing Content-Type field and extracting
482  * the value of 'boundary' parameter.
483  *
484  * It finds Content-Type field, then parses it, then checks the value
485  * of media-type, then finds 'boundary' parameter, the checks the validity
486  * of 'boundary' value and then adds two leading hypens to the value of
487  * 'boundary' parameter.
488  *
489  * The returned value (if there is no error) can be used for spliting
490  * a multipart body to separate parts.
491  *
492  * @since v.0.6.1
493  */
494 template< typename Extra_Data >
495 RESTINIO_NODISCARD
496 expected_t< std::string, enumeration_error_t >
498  const generic_request_t< Extra_Data > & req,
501 {
502  namespace hfp = restinio::http_field_parsers;
504 
505  // Content-Type header file should be present.
506  const auto content_type = req.header().opt_value_of(
508  if( !content_type )
509  return make_unexpected(
511 
512  // Content-Type field should successfuly parsed and should
513  // contain value that correspond to expected media-type.
515  *content_type );
516  if( !parse_result )
517  return make_unexpected(
519 
520  const auto & media_type = parse_result->media_type;
522  {
523  return make_unexpected(
525  }
528  {
529  return make_unexpected(
531  }
532 
533  // `boundary` param should be present in parsed Content-Type value.
534  const auto boundary = hfp::find_first(
536  "boundary" );
537  if( !boundary )
538  return make_unexpected(
540 
541  // `boundary` should have valid value.
545 
546  // Actual value of boundary mark can be created.
551 
552  return { std::move(actual_boundary_mark) };
553 }
554 
555 namespace impl
556 {
557 
558 /*!
559  * @brief A function that parses every part of a multipart body and
560  * calls a user-provided handler for every parsed part.
561  *
562  * @return the count of parts successfuly handled by @a handler or
563  * error code in the case if some error is detected.
564  *
565  * @since v.0.6.1
566  */
567 template< typename Handler >
568 RESTINIO_NODISCARD
569 expected_t< std::size_t, enumeration_error_t >
571  const std::vector< string_view_t > & parts,
572  Handler && handler )
573 {
574  std::size_t parts_processed{ 0u };
576 
577  for( auto current_part : parts )
578  {
579  // The current part should be parsed to headers and the body.
581  if( !part_parse_result )
583 
584  // NOTE: parsed_part is passed as rvalue reference!
587 
589  ++parts_processed;
590  else
592 
594  break;
595  }
596 
597  if( error )
598  return make_unexpected( *error );
599 
600  return parts_processed;
601 }
602 
603 //
604 // valid_handler_type
605 //
606 template< typename, typename = restinio::utils::metaprogramming::void_t<> >
607 struct valid_handler_type : public std::false_type {};
608 
609 template< typename T >
611  T,
613  std::enable_if_t<
614  std::is_same<
616  decltype(std::declval<T>()(std::declval<parsed_part_t>()))
617  >::value,
618  bool
619  >
620  >
621  > : public std::true_type
622 {};
623 
624 } /* namespace impl */
625 
626 //
627 // enumerate_parts
628 //
629 /*!
630  * @brief A helper function for enumeration of parts of a multipart body.
631  *
632  * This function:
633  *
634  * - finds Content-Type field for @a req;
635  * - parses Content-Type field, checks the media-type and extracts
636  * the value of 'boundary' parameter. The extracted 'boundary'
637  * parameter is checked for validity;
638  * - splits the body of @a req using value of 'boundary' parameter;
639  * - enumerates every part of body, parses every part and calls
640  * @handler for every parsed part.
641  *
642  * Enumeration stops if @a handler returns handling_result_t::stop_enumeration
643  * or handling_result_t::terminate_enumeration. If @a handler returns
644  * handling_result_t::terminate_enumeration the enumerate_parts() returns
645  * enumeration_error_t::terminated_by_handler error code.
646  *
647  * A handler passed as @a handler argument should be a function or
648  * lambda/functor with one of the following formats:
649  * @code
650  * handling_result_t(parsed_part_t part);
651  * handling_result_t(parsed_part_t && part);
652  * handling_result_t(const parsed_part_t & part);
653  * @endcode
654  * Note that enumerate_part() passes parsed_part_t instance to
655  * @a handler as rvalue reference. And this reference will be invalidaded
656  * after the return from @a handler.
657  *
658  * Usage example:
659  * @code
660  * auto on_post(const restinio::request_handle_t & req) {
661  * using namespace restinio::multipart_body;
662  * const auto result = enumerate_parts( *req,
663  * [](parsed_part_t part) {
664  * ... // Some actions with the current part.
665  * return handling_result_t::continue_enumeration;
666  * },
667  * "multipart", "form-data" );
668  * if(result) {
669  * ... // Producing positive response.
670  * }
671  * else {
672  * ... // Producing negative response.
673  * }
674  * return restinio::request_accepted();
675  * }
676  * @endcode
677  *
678  * @return the count of parts successfuly handled by @a handler or
679  * error code in the case if some error is detected.
680  *
681  * @since v.0.6.1
682  */
683 template< typename User_Type, typename Handler >
684 RESTINIO_NODISCARD
685 expected_t< std::size_t, enumeration_error_t >
687  //! The request to be handled.
688  const generic_request_t< User_Type > & req,
689  //! The handler to be called for every parsed part.
690  Handler && handler,
691  //! The expected value of 'type' part of 'media-type' from Content-Type.
692  //! If 'type' part is not equal to @a expected_media_type then
693  //! enumeration won't be performed.
694  //!
695  //! @note
696  //! The special value '*' is not handled here.
698  //! The optional expected value of 'subtype' part of 'media-type'
699  //! from Content-Type. If @a expected_media_subtype is specified and
700  //! missmatch with 'subtype' part then enumeration won't be performed.
701  //!
702  //! @note
703  //! The special value '*' is not handled here.
705 {
706  static_assert(
708  "Handler should be callable object, "
709  "should accept parsed_part_t by value, const or rvalue reference, "
710  "and should return handling_result_t" );
711 
713  req,
716  if( boundary )
717  {
718  const auto parts = split_multipart_body( req.body(), *boundary );
719 
720  if( parts.empty() )
721  return make_unexpected(
723 
725  parts,
726  std::forward<Handler>(handler) );
727  }
728 
729  return make_unexpected( boundary.error() );
730 }
731 
732 } /* namespace multipart_body */
733 
734 } /* namespace restinio */
RESTINIO_NODISCARD constexpr bool is_bchar(char ch)
Enumeration of parts should be ignored. All remaining parts of multipart body will be skipped and the...
A description of parsed content of one part of a multipart body.
RESTINIO_NODISCARD expected_t< string_view_t, easy_parser::parse_error_t > try_parse(easy_parser::impl::source_t &from) const noexcept
RESTINIO_NODISCARD expected_t< std::size_t, enumeration_error_t > enumerate_parts_of_request_body(const std::vector< string_view_t > &parts, Handler &&handler)
A function that parses every part of a multipart body and calls a user-provided handler for every par...
Value of &#39;boundary&#39; parameter is invalid (for example it contains some illegal characters).
Content-Type field is not found. If Content-Type is absent there is no way to detect &#39;boundary&#39; param...
enumeration_error_t
The result of an attempt to enumerate parts of a multipart body.
string_view_t body
The body of that part.
No parts of a multipart body actually found.
Enumeration of parts was aborted by user-provided handler. This code is returned when user-provided h...
Content-Type field value parsed but doesn&#39;t contain an appropriate value. For example there can be me...
http_header_fields_t fields
HTTP-fields local for that part.
RESTINIO_NODISCARD expected_t< std::string, easy_parser::parse_error_t > try_parse(easy_parser::impl::source_t &from) const
Some unexpected error encountered during the enumeration.
std::enable_if< std::is_same< Parameter_Container, query_string_params_t >::value||std::is_same< Parameter_Container, router::route_params_t >::value, optional_t< Value_Type > >::type opt_value(const Parameter_Container &params, string_view_t key)
Gets the value of a parameter specified by key wrapped in optional_t<Value_Type> if parameter exists ...
Definition: value_or.hpp:64