< 返回我的博客

smile921 发表于 2024-03-20 16:50

Tags:nom;练习;

nom 练习尝试解析curl

通过动手实现nom 解析从浏览器抓取的 curl 命令练习;

几天断断续续尝试,终于见到曙光了

模板尝试解析下面的一段命令行

curl 'http://query.sse.com.cn/commonQuery.do?jsonCallBack=jsonpCallback89469743&sqlId=COMMON_SSE_SJ_GPSJ_CJGK_MRGK_C&PRODUCT_CODE=01%2C02%2C03%2C11%2C17&type=inParams&SEARCH_DATE=2024-03-18&_=1710914422498' \
  -H 'Accept: */*' \
  -H 'Accept-Language: en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7' \
  -H 'Cache-Control: no-cache' \
  -H 'Connection: keep-alive' \
  -H 'Cookie: gdp_user_id=gioenc-c2b256a9%2C5442%2C561b%2C9c02%2C71199e7e89g9; VISITED_MENU=%5B%228312%22%5D; ba17301551dcbaf9_gdp_session_id=2e27fee0-b184-4efa-a66f-f651e5be47e0; ba17301551dcbaf9_gdp_session_id_sent=2e27fee0-b184-4efa-a66f-f651e5be47e0; ba17301551dcbaf9_gdp_sequence_ids={%22globalKey%22:139%2C%22VISIT%22:4%2C%22PAGE%22:18%2C%22VIEW_CLICK%22:117%2C%22VIEW_CHANGE%22:3}' \
  -H 'Pragma: no-cache' \
  -H 'Referer: http://www.sse.com.cn/' \
  -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36' \
  --insecure

我的尝试练习


#[test]
fn it_works_curl_parse(){ 
    let cmd = include_str!("./api.rest");
    print!("{}",cmd);
    #[derive(Debug)]
    struct CurlCommand<'a> {
        pub cmd: &'a str,
        pub url: &'a str,
        pub querys: Vec<(&'a str,&'a str)>,
        pub headers: HashMap<&'a str,&'a str>,
        // pub body: &'a str,
        pub remaining: &'a str,
    }
    fn parse_param(input:&str)->IResult<&str,(&str,&str)> {
        let (input,key)= take_until( "=")(input)?;
        let (input,_) = char('=')(input)?;
        let (input,val) = take_until("&")(input)?;
        Ok((input,(key,val)))
    }
    fn parse_header(input:&str)-> IResult<&str,HashMap<&str,&str>> {
       let (input,_) = multispace0(input)?;
       let (input,_) = tag("-H")(input)?;
       let (input,_) = multispace0(input)?;
       let (input,header) = take_until("\\")(input)?; 
       let (_,(key,val)) = separated_pair(take_until(":"), tag(":"), take_until("'"))(header)?;
       let (key,_)=char('\'')(key)?;
       let (val,_) =multispace0(val)?; 
       // TODO urldecode
       let mut res = HashMap::new();
       res.insert(key, val);
       let (input,_)= multispace0(input)?;
       let (input,_)= many0(char('\\'))(input)?;
       //    let (input,_)= many0(line_ending)(input)?;
       Ok((input,res))
    }
    fn parse_querys(input:&str)-> IResult<&str,Vec<(&str,&str)>>{
        
        let (input,mut result) =separated_list0(char('&'), parse_param)(input)?;
        if !input.ends_with('&') && input.len()>1 {
             let (input,key)= take_until("=")(input)?;
             let (key,_) = char('&')(key)?;
             let (val,_)= char('=')(input)?;
             // let (input,(key,val)) = separated_pair(first, tag("="), second)(input)?;
             result.push((key,val));
        }
        core::result::Result::Ok((input,result))
     }
    fn parse_headers(input:&str)-> IResult<&str,HashMap<&str,&str>>{
       let (input, result_vec) =separated_list0(line_ending, parse_header)(input)?;
       let mut result = HashMap::new();
       for it in result_vec {
            result.extend(it);
       }
       core::result::Result::Ok((input,result))
    }
    fn parse(input:&str) -> IResult<&str,CurlCommand> {
        let (input,_)= multispace0(input)?;
        let (input, cmd) = tag("curl")(input)?;
        let (input, _) = multispace0(input)?;
        let (input,url)= delimited(char('\''), take_until("'"), char('\''))(input)?;
        let (remain,url) = take_until("?")(url)?;
        let (query,_) = char('?')(remain)?;
        let (_, querys) = parse_querys(query)?;
        let (input,_find) = multispace0(input)?;
        // println!("{:?}",find.as_bytes());
        let (input,_)= char('\\')(input)?;
        let (input,_)= line_ending(input)?;
        let (remain, header) = take_until("--insecure")(input)?;
        let (_,headers) = parse_headers(header)?;

        let res = CurlCommand {
            cmd,
            url,
            querys,
            headers,
            remaining: remain,
        };

        core::result::Result::Ok((input,res))
    } 
    let cc = parse(cmd).unwrap().1;
    assert_eq!("curl",cc.cmd);
    assert_eq!("http://query.sse.com.cn/commonQuery.do",cc.url);
    assert_eq!(6,cc.querys.len());
    assert_eq!(8,cc.headers.len());
    println!(" . ");
    println!("remaing:{}", cc.remaining);

    println!("parsed querys {:?}",cc.querys);
    println!("parsed headers {:?}",cc.headers);
}

当然还需要继续改进

评论区

写评论
Unic 2024-10-19 00:38

我看着你的内容挺少的, 所以自己也去尝试写了一些, 发现, 其中还是有不少需要注意的呀... 不过,好在还是写完了, 我以为一个下午就能写完, 结果还是花了1整天... :(

我写好的在这里 nomcurl. :)

1 共 1 条评论, 1 页