一直想做个爬虫,却不知道怎么开始,在网上找了个C下载网页的程序,保存下,下次可以直接从这看。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <unistd.h>
char* host = "www.hao123.com";
int port = 80;
int main(void)
{
 char buffer[512];
 int isock;
 struct sockaddr_in pin;
 struct hostent * remoteHost;
 char message[512];
 int done = 0;
 int chars = 0;
 int l = 0;
 if( (remoteHost = gethostbyname(host)) == 0 )
 {
  printf("Error resolving host\n");
  exit(1);
 }
 bzero(message,sizeof(message));
 bzero(&pin,sizeof(pin));
 pin.sin_family = AF_INET;
 pin.sin_port = htons(port);
 pin.sin_addr.s_addr = ( (struct in_addr *)(remoteHost->h_addr) )->s_addr;
 
 if( (isock = socket(AF_INET, SOCK_STREAM, 0)) == -1)
 {
  printf("Error opening socket!\n");
  exit(1);
 }
 sprintf(message, "GET / HTTP/1.1\r\n");
 strcat(message, "Host:www.hao123.com\r\n");
 strcat(message, "Accept: */*\r\n");
 strcat(message, "User-Agent: Mozilla/4.0(compatible)\r\n");
 strcat(message, "connection:Keep-Alive\r\n");
 strcat(message, "\r\n\r\n");
 printf("%s",message);
 if( connect(isock, (const sockaddr*) &pin, sizeof(pin)) == -1 )
 {
  printf("Error connecting to socket\n");
  exit(1);
 }
 if( send(isock, message, strlen(message), 0) == -1)
 {
  printf("Error in send\n");
  exit(1);
 }
 
 struct timeval timeout = {1,0};  //设置超时时间1秒,0代表秒后面的微秒数,左边这个就是1秒0微秒
    
 //设置接收超时
 setsockopt(isock, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, sizeof(struct timeval));

 while(done == 0)
 {
  l = recv(isock, buffer, 1, 0);
  if( l < 0 )
   done = 1;
  switch(*buffer)
  {
   case '\r':
    break;
   case '\n':
    if(chars == 0)
     done = 1;
    chars = 0;
    break;
   default:
    chars++;
    break;
  }
   printf("%c",*buffer);
 }
 do
 {
  l = recv(isock, buffer, sizeof(buffer) - 1, 0);
  if( l < 0 )
   break;
  *(buffer + l) = 0;
  fputs(buffer, stdout);
 }while( l > 0 );
 close(isock);
 return 0;
} 


Logo

更多推荐