Packet Sniffer Code in C Using Linux Sockets

Pkt Sniffer Code (Read pkts only) using AF_PACKET Linux Sockets

In the previous part we made a simple sniffer which created a raw socket and started receiving on it. But it had few drawbacks:

1. Could sniff only incoming data.

2. Could sniff only TCP or UDP or ICMP or any one protocol packets at a time.

3. Provided IP frames, so Ethernet headers were not available.

In this article we are going to modify the same code to fix the above 3 drawbacks. However we shall not be using libpcap. This will

be done using pure Linux sockets. The difference is very small and is 2 lines :

Instead of :1 sock_raw = socket(AF_INET , SOCK_RAW , IPPROTO_TCP);

We do :

123

sock_raw = socket( AF_PACKET , SOCK_RAW , htons(ETH_P_ALL)) ;//Optional//setsockopt(sock_raw , SOL_SOCKET , SO_BINDTODEVICE , "eth0" , strlen("eth0")+ 1 );

and we are done.

Now it will:

1. Sniff both incoming and outgoing traffic.

2. Sniff ALL ETHERNET FRAMES, which includes all kinds of IP packets and even more if there are any.

3. Provides the Ethernet headers too, which contain the mac addresses.

The setsockopt line is optional. Its important to provide the correct interface name to setsockopt , eth0 in this case and in most cases.

So may be you would like to present the user with a list of interfaces available and allow him to choose the one to be sniffed.

AGAM NOTE: Can we use this method with the adapter in promiscuous mode and capture everything on the wire? Libpcap

will do this but can this method do it?

Here is the full source code:#include<netinet/in.h>#include<errno.h>#include<netdb.h>#include<stdio.h> //For standard things#include<stdlib.h> //malloc#include<string.h> //strlen

#include<netinet/ip_icmp.h> //Provides declarations for icmp header#include<netinet/udp.h> //Provides declarations for udp header#include<netinet/tcp.h> //Provides declarations for tcp header#include<netinet/ip.h> //Provides declarations for ip header#include<netinet/if_ether.h> //For ETH_P_ALL#include<net/ethernet.h> //For ether_header#include<sys/socket.h>#include<arpa/inet.h>#include<sys/ioctl.h>#include<sys/time.h>#include<sys/types.h>#include<unistd.h>

void ProcessPacket(unsigned char* , int);void print_ip_header(unsigned char* , int);void print_tcp_packet(unsigned char * , int );void print_udp_packet(unsigned char * , int );

void print_icmp_packet(unsigned char* , int );void PrintData (unsigned char* , int);

FILE *logfile;struct sockaddr_in source,dest;int tcp=0,udp=0,icmp=0,others=0,igmp=0,total=0,i,j;

int main(){ int saddr_size , data_size; struct sockaddr saddr; unsigned char *buffer = (unsigned char *) malloc(65536); //Its Big! logfile=fopen("log.txt","w"); if(logfile==NULL) { printf("Unable to create log.txt file."); } printf("Starting...\n"); int sock_raw = socket( AF_PACKET , SOCK_RAW , htons(ETH_P_ALL)) ; //setsockopt(sock_raw , SOL_SOCKET , SO_BINDTODEVICE , "eth0" , strlen("eth0")+ 1 ); if(sock_raw < 0) { //Print the error with proper message perror("Socket Error"); return 1; } while(1) { saddr_size = sizeof saddr; //Receive a packet data_size = recvfrom(sock_raw , buffer , 65536 , 0 , &saddr , (socklen_t*) &saddr_size); if(data_size <0 ) { printf("Recvfrom error , failed to get packets\n"); return 1; } //Now process the packet ProcessPacket(buffer , data_size); } close(sock_raw); printf("Finished"); return 0;}

void ProcessPacket(unsigned char* buffer, int size){ //Get the IP Header part of this packet , excluding the ethernet header struct iphdr *iph = (struct iphdr*)(buffer + sizeof(struct ethhdr)); ++total; switch (iph->protocol) //Check the Protocol and do accordingly... { case 1: //ICMP Protocol ++icmp; print_icmp_packet( buffer , size); break; case 2: //IGMP Protocol ++igmp; break; case 6: //TCP Protocol ++tcp;

print_tcp_packet(buffer , size); break; case 17: //UDP Protocol ++udp; print_udp_packet(buffer , size); break; default: //Some Other Protocol like ARP etc. ++others; break; } printf("TCP : %d UDP : %d ICMP : %d IGMP : %d Others : %d Total : %d\r", tcp , udp , icmp , igmp , others , total);}

void print_ethernet_header(unsigned char* Buffer, int Size){ struct ethhdr *eth = (struct ethhdr *)Buffer; fprintf(logfile , "\n"); fprintf(logfile , "Ethernet Header\n"); fprintf(logfile , " |-Destination Address : %.2X-%.2X-%.2X-%.2X-%.2X-%.2X \n", eth->h_dest[0],eth->h_dest[1],eth->h_dest[2],eth->h_dest[3],eth->h_dest[4], eth->h_dest[5] ); fprintf(logfile , " |-Source Address : %.2X-%.2X-%.2X-%.2X-%.2X-%.2X \n", eth->h_source[0] , eth->h_source[1] , eth->h_source[2] , eth->h_source[3] , eth->h_source[4] , eth->h_source[5] ); fprintf(logfile , " |-Protocol : %u \n",(unsigned short)eth->h_proto);}

void print_ip_header(unsigned char* Buffer, int Size){ print_ethernet_header(Buffer , Size);

unsigned short iphdrlen; struct iphdr *iph = (struct iphdr *)(Buffer + sizeof(struct ethhdr) ); iphdrlen =iph->ihl*4; memset(&source, 0, sizeof(source)); source.sin_addr.s_addr = iph->saddr; memset(&dest, 0, sizeof(dest)); dest.sin_addr.s_addr = iph->daddr; fprintf(logfile , "\n"); fprintf(logfile , "IP Header\n"); fprintf(logfile , " |-IP Version : %d\n",(unsigned int)iph->version); fprintf(logfile , " |-IP Header Length : %d DWORDS or %d Bytes\n", (unsigned int)iph->ihl, ((unsigned int)(iph->ihl))*4); fprintf(logfile , " |-Type Of Service : %d\n",(unsigned int)iph->tos); fprintf(logfile , " |-IP Total Length : %d Bytes(Size of Packet)\n", ntohs(iph->tot_len)); fprintf(logfile , " |-Identification : %d\n",ntohs(iph->id)); //fprintf(logfile , " |-Reserved ZERO Field : %d\n", (unsigned int)iphdr- >ip_reserved_zero); //fprintf(logfile , " |-Dont Fragment Field : %d\n", (unsigned int)iphdr->ip_dont_fragment); //fprintf(logfile , " |-More Fragment Field : %d\n", (unsigned int)iphdr->ip_more_fragment);

fprintf(logfile , " |-TTL : %d\n",(unsigned int)iph->ttl); fprintf(logfile , " |-Protocol : %d\n",(unsigned int)iph->protocol); fprintf(logfile , " |-Checksum : %d\n",ntohs(iph->check)); fprintf(logfile , " |-Source IP : %s\n",inet_ntoa(source.sin_addr)); fprintf(logfile , " |-Destination IP : %s\n",inet_ntoa(dest.sin_addr));}

void print_tcp_packet(unsigned char* Buffer, int Size){ unsigned short iphdrlen; struct iphdr *iph = (struct iphdr *)( Buffer + sizeof(struct ethhdr) ); iphdrlen = iph->ihl*4; struct tcphdr *tcph=(struct tcphdr*)(Buffer + iphdrlen + sizeof(struct ethhdr)); int header_size = sizeof(struct ethhdr) + iphdrlen + tcph->doff*4; fprintf(logfile , "\n\n**********************TCP Packet*************************\n"); print_ip_header(Buffer,Size); fprintf(logfile , "\n"); fprintf(logfile , "TCP Header\n"); fprintf(logfile , " |-Source Port : %u\n",ntohs(tcph->source)); fprintf(logfile , " |-Destination Port : %u\n",ntohs(tcph->dest)); fprintf(logfile , " |-Sequence Number : %u\n",ntohl(tcph->seq)); fprintf(logfile , " |-Acknowledge Number : %u\n",ntohl(tcph->ack_seq)); fprintf(logfile , " |-Header Length : %d DWORDS or %d BYTES\n" , (unsigned int)tcph->doff,(unsigned int)tcph->doff*4); //fprintf(logfile , " |-CWR Flag : %d\n",(unsigned int)tcph->cwr); //fprintf(logfile , " |-ECN Flag : %d\n",(unsigned int)tcph->ece); fprintf(logfile , " |-Urgent Flag : %d\n",(unsigned int)tcph->urg); fprintf(logfile , " |-Acknowledgement Flag : %d\n",(unsigned int)tcph->ack); fprintf(logfile , " |-Push Flag : %d\n",(unsigned int)tcph->psh); fprintf(logfile , " |-Reset Flag : %d\n",(unsigned int)tcph->rst); fprintf(logfile , " |-Synchronise Flag : %d\n",(unsigned int)tcph->syn); fprintf(logfile , " |-Finish Flag : %d\n",(unsigned int)tcph->fin); fprintf(logfile , " |-Window : %d\n",ntohs(tcph->window)); fprintf(logfile , " |-Checksum : %d\n",ntohs(tcph->check)); fprintf(logfile , " |-Urgent Pointer : %d\n",tcph->urg_ptr); fprintf(logfile , "\n"); fprintf(logfile , " DATA Dump "); fprintf(logfile , "\n"); fprintf(logfile , "IP Header\n"); PrintData(Buffer,iphdrlen); fprintf(logfile , "TCP Header\n"); PrintData(Buffer+iphdrlen,tcph->doff*4); fprintf(logfile , "Data Payload\n"); PrintData(Buffer + header_size , Size - header_size ); fprintf(logfile , "\n###########################################################");}

void print_udp_packet(unsigned char *Buffer , int Size){ unsigned short iphdrlen; struct iphdr *iph = (struct iphdr *)(Buffer + sizeof(struct ethhdr));

iphdrlen = iph->ihl*4; struct udphdr *udph = (struct udphdr*)(Buffer + iphdrlen + sizeof(struct ethhdr)); int header_size = sizeof(struct ethhdr) + iphdrlen + sizeof udph; fprintf(logfile , "\n\n**********************UDP Packet*************************\n"); print_ip_header(Buffer,Size); fprintf(logfile , "\nUDP Header\n"); fprintf(logfile , " |-Source Port : %d\n" , ntohs(udph->source)); fprintf(logfile , " |-Destination Port : %d\n" , ntohs(udph->dest)); fprintf(logfile , " |-UDP Length : %d\n" , ntohs(udph->len)); fprintf(logfile , " |-UDP Checksum : %d\n" , ntohs(udph->check)); fprintf(logfile , "\n"); fprintf(logfile , "IP Header\n"); PrintData(Buffer , iphdrlen); fprintf(logfile , "UDP Header\n"); PrintData(Buffer+iphdrlen , sizeof udph); fprintf(logfile , "Data Payload\n"); //Move the pointer ahead and reduce the size of string PrintData(Buffer + header_size , Size - header_size); fprintf(logfile , "\n###########################################################");}

void print_icmp_packet(unsigned char* Buffer , int Size){ unsigned short iphdrlen; struct iphdr *iph = (struct iphdr *)(Buffer + sizeof(struct ethhdr)); iphdrlen = iph->ihl * 4; struct icmphdr *icmph = (struct icmphdr *)(Buffer + iphdrlen + sizeof(struct ethhdr)); int header_size = sizeof(struct ethhdr) + iphdrlen + sizeof icmph; fprintf(logfile , "\n\n********************ICMP Packet**********************\n"); print_ip_header(Buffer , Size); fprintf(logfile , "\n"); fprintf(logfile , "ICMP Header\n"); fprintf(logfile , " |-Type : %d",(unsigned int)(icmph->type)); if((unsigned int)(icmph->type) == 11) { fprintf(logfile , " (TTL Expired)\n"); } else if((unsigned int)(icmph->type) == ICMP_ECHOREPLY) { fprintf(logfile , " (ICMP Echo Reply)\n"); } fprintf(logfile , " |-Code : %d\n",(unsigned int)(icmph->code)); fprintf(logfile , " |-Checksum : %d\n",ntohs(icmph->checksum)); //fprintf(logfile , " |-ID : %d\n",ntohs(icmph->id)); //fprintf(logfile , " |-Sequence : %d\n",ntohs(icmph->sequence));

fprintf(logfile , "\n");

fprintf(logfile , "IP Header\n"); PrintData(Buffer,iphdrlen); fprintf(logfile , "UDP Header\n"); PrintData(Buffer + iphdrlen , sizeof icmph); fprintf(logfile , "Data Payload\n"); //Move the pointer ahead and reduce the size of string PrintData(Buffer + header_size , (Size - header_size) ); fprintf(logfile , "\n###########################################################");}

void PrintData (unsigned char* data , int Size){ int i , j; for(i=0 ; i < Size ; i++) { if( i!=0 && i%16==0) //if one line of hex printing is complete... { fprintf(logfile , " "); for(j=i-16 ; j<i ; j++) { if(data[j]>=32 && data[j]<=128) fprintf(logfile , "%c",(unsigned char)data[j]); //if number or alphabet else fprintf(logfile , "."); //otherwise print a dot } fprintf(logfile , "\n"); } if(i%16==0) fprintf(logfile , " "); fprintf(logfile , " %02X",(unsigned int)data[i]); if( i==Size-1) //print the last spaces { for(j=0;j<15-i%16;j++) { fprintf(logfile , " "); //extra spaces } fprintf(logfile , " "); for(j=i-i%16 ; j<=i ; j++) { if(data[j]>=32 && data[j]<=128) { fprintf(logfile , "%c",(unsigned char)data[j]); } else { fprintf(logfile , "."); } } fprintf(logfile , "\n" ); } }}

The log file will looks somewhat like this :

|-Destination Port : 5222

|-Sequence Number : 78458457

|-Acknowledge Number : 2427066746

|-Header Length : 5 DWORDS or 20 BYTES

|-Urgent Flag : 0

|-Acknowledgement Flag : 1

|-Push Flag : 1

|-Reset Flag : 0

|-Synchronise Flag : 0

|-Finish Flag : 0

|-Window : 62920

|-Checksum : 21544

|-Urgent Pointer : 0

DATA Dump

IP Header

00 25 5E 1A 3D F1 00 1C C0 F8 79 EE 08 00 45 00 .%^.=.....y...E.

00 8D 33 42 ..3B

TCP Header

40 00 40 06 B3 80 C0 A8 01 06 4A 7D 47 7D 83 77 @.@..?....J}G}.w

14 66 04 AD .f..

Data Payload

17 03 01 00 60 A0 9C 5D 14 A1 25 AB CE 8B 7C EB ....`..]..%...|.

1A A4 43 A6 60 DD E8 6B 6E 43 C1 94 6A D2 25 23 ..C.`..knC..j.%#

03 98 59 67 1A 2C 07 D3 7E B2 B8 9F 83 38 4C 69 ..Yg.,..~....8Li

D3 3A 8E 0D 9E F0 6B CE 9E 6B F4 E1 BD 9E 50 53 .:....k..k....PS

6D F6 AB 11 05 D6 41 82 F0 03 0C A6 E2 48 2B 71 m.....A......H+q

16 81 FF 5B DF 50 D4 5B AD 90 04 5E 4C 94 E7 9B ...[.P.[...^L...

0B 72 7E 32 88 .r~2.

###########################################################

In the above log we can see the Ethernet headers being printed. They show the source and destination mac address along with the

packet protocol. 8 means IP protocol

Note:

1. If you want to sniff only IP and ARP packets for example then you can try this:

sock_raw = socket( AF_PACKET , SOCK_RAW , htons(ETH_P_IP|ETH_P_ARP)) ;

The complete list of protocols is found in /usr/include/linux/if_ether.h

1234567891011121314151617181920212223

/* * These are the defined Ethernet Protocol ID's. */

#define ETH_P_LOOP 0x0060 /* Ethernet Loopback packet */#define ETH_P_PUP 0x0200 /* Xerox PUP packet */#define ETH_P_PUPAT 0x0201 /* Xerox PUP Addr Trans packet */#define ETH_P_IP 0x0800 /* Internet Protocol packet */#define ETH_P_X25 0x0805 /* CCITT X.25 */#define ETH_P_ARP 0x0806 /* Address Resolution packet */#define ETH_P_BPQ 0x08FF /* G8BPQ AX.25 Ethernet Packet [NOT AN OFFICIALLY REGISTERED ID] */#define ETH_P_IEEEPUP 0x0a00 /* Xerox IEEE802.3 PUP packet */#define ETH_P_IEEEPUPAT 0x0a01 /* Xerox IEEE802.3 PUP Addr Trans packet */#define ETH_P_DEC 0x6000 /* DEC Assigned proto */#define ETH_P_DNA_DL 0x6001 /* DEC DNA Dump/Load */#define ETH_P_DNA_RC 0x6002 /* DEC DNA Remote Console */#define ETH_P_DNA_RT 0x6003 /* DEC DNA Routing */#define ETH_P_LAT 0x6004 /* DEC LAT */#define ETH_P_DIAG 0x6005 /* DEC Diagnostics */#define ETH_P_CUST 0x6006 /* DEC Customer use */#define ETH_P_SCA 0x6007 /* DEC Systems Comms Arch */#define ETH_P_TEB 0x6558 /* Trans Ether Bridging */#define ETH_P_RARP 0x8035 /* Reverse Addr Res packet */

2425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576

#define ETH_P_ATALK 0x809B /* Appletalk DDP */#define ETH_P_AARP 0x80F3 /* Appletalk AARP */#define ETH_P_8021Q 0x8100 /* 802.1Q VLAN Extended Header */#define ETH_P_IPX 0x8137 /* IPX over DIX */#define ETH_P_IPV6 0x86DD /* IPv6 over bluebook */#define ETH_P_PAUSE 0x8808 /* IEEE Pause frames. See 802.3 31B */#define ETH_P_SLOW 0x8809 /* Slow Protocol. See 802.3ad 43B */#define ETH_P_WCCP 0x883E /* Web-cache coordination protoc draft-wilson-wrec-wccp-v2-00.txt */#define ETH_P_PPP_DISC 0x8863 /* PPPoE discovery messages */#define ETH_P_PPP_SES 0x8864 /* PPPoE session messages */#define ETH_P_MPLS_UC 0x8847 /* MPLS Unicast traffic */#define ETH_P_MPLS_MC 0x8848 /* MPLS Multicast traffic */#define ETH_P_ATMMPOA 0x884c /* MultiProtocol Over ATM */#define ETH_P_LINK_CTL 0x886c /* HPNA, wlan link local tunnel */#define ETH_P_ATMFATE 0x8884 /* Frame-based ATM Transport * over Ethernet */#define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */#define ETH_P_AOE 0x88A2 /* ATA over Ethernet */#define ETH_P_TIPC 0x88CA /* TIPC */#define ETH_P_1588 0x88F7 /* IEEE 1588 Timesync */#define ETH_P_FCOE 0x8906 /* Fibre Channel over Ethernet */#define ETH_P_FIP 0x8914 /* FCoE Initialization Protocol */#define ETH_P_EDSA 0xDADA /* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */

/* * Non DIX types. Won't clash for 1500 types. */

#define ETH_P_802_3 0x0001 /* Dummy type for 802.3 frames */#define ETH_P_AX25 0x0002 /* Dummy protocol id for AX.25 */#define ETH_P_ALL 0x0003 /* Every packet (be careful!!!) */#define ETH_P_802_2 0x0004 /* 802.2 frames */#define ETH_P_SNAP 0x0005 /* Internal only */#define ETH_P_DDCMP 0x0006 /* DEC DDCMP: Internal only */#define ETH_P_WAN_PPP 0x0007 /* Dummy type for WAN PPP frames*/#define ETH_P_PPP_MP 0x0008 /* Dummy type for PPP MP frames */#define ETH_P_LOCALTALK 0x0009 /* Localtalk pseudo type */#define ETH_P_CAN 0x000C /* Controller Area Network */#define ETH_P_PPPTALK 0x0010 /* Dummy type for Atalk over PPP*/#define ETH_P_TR_802_2 0x0011 /* 802.2 frames */#define ETH_P_MOBITEX 0x0015 /* Mobitex ([email protected]) */#define ETH_P_CONTROL 0x0016 /* Card specific control frames */#define ETH_P_IRDA 0x0017 /* Linux-IrDA */#define ETH_P_ECONET 0x0018 /* Acorn Econet */#define ETH_P_HDLC 0x0019 /* HDLC frames */#define ETH_P_ARCNET 0x001A /* 1A for ArcNet :-) */#define ETH_P_DSA 0x001B /* Distributed Switch Arch. */#define ETH_P_TRAILER 0x001C /* Trailer switch tagging */#define ETH_P_PHONET 0x00F5 /* Nokia Phonet frames */#define ETH_P_IEEE802154 0x00F6 /* IEEE802.15.4 frame */#define ETH_P_CAIF 0x00F7 /* ST-Ericsson CAIF protocol */

Enjoy!!

Send an Eth frame using an AF_PACKET socket in C

Content

1 Objective 2 Background 3 Scenario 4 Method o 4.1 Overview o 4.2 Select the required EtherType o 4.3 Create the AF_PACKET socket o 4.4 Determine the index number of the Ethernet interface to be

usedo 4.5 Construct the destination address o 4.6 Send the Ethernet frame o 4.7 Send the frame (using sendto) o 4.8 Send the frame (using sendmsg) 5 Alternatives o 5.1 Using libpcap o 5.2 Using a raw socket 6 Further reading

Tested on

Debian (Lenny)

Ubuntu (Lucid, Trusty)

Objective

To send an arbitrary Ethernet frame using an AF_PACKET socket

Background

Ethernet is a link layer protocol. Most networking programs interact with the network stack at the transport layer or above, so have no need to deal with Ethernet frames directly, but there are some circumstances where interaction at a lower level may be necessary. These include:

implementation of Ethernet-based protocols that are not built in to the network stack, and production of malformed or otherwise non-standard frames for testing purposes.

Scenario

Suppose that you wish to send an ARP request for the IP address 192.168.0.83. The request is to be sent from interface eth0 to the broadcast MAC adddress.

(ARP is the Address Resolution Protocol. It is used when a host needs to send a datagram to a given IP address, but does not know which MAC address corresponds to that IP address.)

Method

Overview

The method described here has five steps:

1. Select the required EtherType.2. Create the AF_PACKET socket.3. Determine the index number of the Ethernet interface to be used.4. Construct the destination address.5. Send the Ethernet frame.

The following header files are used:

Header Used by<errno.h> errno

<string.h> memcpy, strerror, strlen<arpa/inet.h> in_addr_t, htons<net/ethernet.h> ETHER_ADDR_LEN, ETH_P_*<net/if.h> struct ifreq<netinet/if_ether.h> struct ether_arp<netpacket/packet.h> struct sockaddr_ll

<sys/ioctl.h> SIOCGIFINDEX, ioctl

<sys/socket.h> struct sockaddr, struct iovec, struct msghdr, AF_PACKET, SOCK_DGRAM, socket, sendto, sendmsg

AF_PACKET sockets are specific to Linux. Programs that make use of them need elevated privileges in order to run.

Setting SO_BROADCAST does not appear to be necessary when sending broadcast frames using an AF_PACKET socket. Some programs do so anyway, which is unlikely to be harmful, and could be considered a worthwhile hedge against any future change in behaviour.

Select the required EtherType

The EtherType of an Ethernet frame specifies the type of payload that it contains. There are several sources from which EtherTypes can be obtained:

The header file <linux/if_ether.h> provides constants for most commonly-used EtherTypes. Examples include ETH_P_IP for the Internet Protocol (0x8000), ETH_P_ARP for the Address Resolution Protocol (0x0806) and ETH_P_8021Q for IEEE 802.1Q VLAN tags (0x8100).

The IEEE maintains the definitive list of registered EtherTypes. A semi-official list is maintained by IANA.

The wildcard value ETH_P_ALL allows any EtherType to be received without using multiple sockets. This includes EtherTypes that are handled by the kernel, such as IP and ARP.

If you need an EtherType for experimental or private use then the values 0x88b5 and 0x88b6 have been reserved for that purpose.

Create the AF_PACKET socket

The socket that will be used to send the Ethernet frame should be created using the socket function. This takes three arguments:

the domain (AF_PACKET for a packet socket); the socket type (SOCK_DGRAM if you want the Ethernet header to be constructed for you

or SOCK_RAW if you want to construct it yourself); and the protocol (equal to the Ethertype chosen above, converted to network byte order), which

is used for filtering inbound packets.

In this instance the socket will be used for sending (and presumably also receiving) ARP requests, therefore the third argument should be set tohtons(ETH_P_ARP) (or equivalently, htons(0x0806)). There is no need to construct a custom Ethernet header so the second argument should be set to SOCK_DGRAM:

int fd=socket(AF_PACKET,SOCK_DGRAM,htons(ETH_P_ARP));if (fd==-1) { die("%s",strerror(errno));}

Determine the index number of the Ethernet interface to be used

Network interfaces are usually identified by name in user-facing contexts, but for some low-level APIs like the one used here a number is used instead. You can obtain the index from the name by means of the ioctl command SIOCGIFINDEX:

struct ifreq ifr;size_t if_name_len=strlen(if_name);if (if_name_len<sizeof(ifr.ifr_name)) { memcpy(ifr.ifr_name,if_name,if_name_len); ifr.ifr_name[if_name_len]=0;} else { die("interface name is too long");}if (ioctl(fd,SIOCGIFINDEX,&ifr)==-1) { die("%s",strerror(errno));}int ifindex=ifr.ifr_ifindex;

For further details of this method see the microHOWTO Get the index number of a Linux network interface in C using SIOCGIFINDEX.

Construct the destination address

To send a frame using an AF_PACKET socket its destination must be given in the form of a sockaddr_ll structure. The fields that you need to specify

are sll_family, sll_addr, sll_halen, sll_ifindex and sll_protocol. The remainder should be zeroed:

const unsigned char ether_broadcast_addr[]= {0xff,0xff,0xff,0xff,0xff,0xff};

struct sockaddr_ll addr={0};addr.sll_family=AF_PACKET;addr.sll_ifindex=ifindex;addr.sll_halen=ETHER_ADDR_LEN;addr.sll_protocol=htons(ETH_P_ARP);memcpy(addr.sll_addr,ether_broadcast_addr,ETHER_ADDR_LEN);

(At the time of writing, the manpage packet(7) stated that only sll_family, sll_addr, sll_halen and sll_ifindex need be provided when sending. This is incorrect. The EtherType specified when opening the socket is used for filtering inbound packets but not for constructing outbound ones.)

Send the Ethernet frame

Frames can in principle be sent using any function that is capable of writing to a file descriptor, however if you have opted for the link-layer header to be constructed automatically then it will be necessary to use either sendto or sendmsg so that a destination address can be specified. Of these sendmsg is the more flexible option, but at the cost of a significantly more complex interface. Details of each function are given below.

Regardless of which function you choose, each function call will result in a separate datagram being sent. For this reason you must either compose each datagram payload as a single, contiguous block of memory, or make use of the scatter/gather capability provided by sendmsg.

In this particular scenario the payload to be sent is an ARP request. For completeness, here is an example of how such a payload might be constructed:

struct ether_arp req;req.arp_hrd=htons(ARPHRD_ETHER);req.arp_pro=htons(ETH_P_IP);req.arp_hln=ETHER_ADDR_LEN;req.arp_pln=sizeof(in_addr_t);req.arp_op=htons(ARPOP_REQUEST);memset(&req.arp_tha,0,sizeof(req.arp_tha));

You will need to set req.arp_tpa to contain the IP address (in network byte order) for which you want to find the corresponding MAC address. For example, starting from a string in dotted quad format:

const char* target_ip_string="192.168.0.83";struct in_addr target_ip_addr={0};if (!inet_aton(target_ip_string,&target_ip_addr)) { die("%s is not a valid IP address",target_ip_string);}memcpy(&req.arp_tpa,&target_ip_addr.s_addr,sizeof(req.arp_tpa));

You will also need to set source_ip_addr and source_hw_addr to contain the IP and MAC addresses of the interface from which the request will be sent (in network byte order). See the microHOWTOs Get the IP address of a network interface in C using SIOCGIFADDR and Get the MAC address of an Ethernet interface in C using SIOCGIFHWADDR for details of how to obtain these given the interface name.

Send the frame (using sendto)

To call sendto you must supply the content of the frame and the remote address to which it should be sent:

if (sendto(fd,&req,sizeof(req),0,(struct sockaddr*)&addr,sizeof(addr))==-1) { die("%s",strerror(errno));}

The fourth argument is for specifying flags which modify the behaviour of sendto, none of which are needed in this example.

The value returned by sendto is the number of bytes sent, or -1 if there was an error. AF_PACKET frames are sent atomically, so unlike when writing to a TCP socket there is no need to wrap the function call in a loop to handle partially-sent data.

Send the frame (using sendmsg)

To call sendmsg, in addition to the datagram content and remote address you must also construct an iovec array and a msghdr structure:

struct iovec iov[1];iov[0].iov_base=&req;iov[0].iov_len=sizeof(req);

struct msghdr message;message.msg_name=&addr;message.msg_namelen=sizeof(addr);message.msg_iov=iov;message.msg_iovlen=1;message.msg_control=0;message.msg_controllen=0;

if (sendmsg(fd,&message,0)==-1) { die("%s",strerror(errno));}

The purpose of the iovec array is to provide a scatter/gather capability so that the datagram payload need not be stored in a contiguous region of memory. In this example the entire payload is stored in a single buffer, therefore only one array element is needed.

The msghdr structure exists to bring the number of arguments to recvmsg and sendmsg down to a managable number. On entry to sendmsg it specifies where the destination address, the datagram payload and any ancillary data are stored. In this example no ancillary data has been provided.

If you wish to pass any flags into sendmsg then this cannot be done using msg_flags, which is ignored on entry. Instead you must pass them using the third argument to sendmsg (which is zero in this example).

Alternatives

Using libpcap

See: Send an arbitrary Ethernet frame using libpcap

libpcap is a cross-platform library for capturing traffic from network interfaces. It also has the ability to send, so provides broadly the same functionality as a packet socket (and on Linux, is implemented using a packet socket).

The main advantage of using libpcap is that it abstracts away differences between the operating systems that it supports, thereby allowing relatively portable code to be written. This involves some loss of functionality, and that may make libpcap unsuitable for use in some circumstances, but otherwise it is recommended in preference to AF_PACKET sockets on the grounds of portability.

Using a raw socket

See: Send an arbitrary IPv4 datagram using a raw socket in C

Raw sockets differ from packet sockets in that they operate at the network layer as opposed to the link layer. For this reason they are limited to network protocols for which raw socket support has been explicitly built into the network stack, but they also have a number of advantages which result from operating at a higher level of abstraction:

You can write code that will work with any suitable type of network interface. Routing and link-layer address resolution are handled for you. The network layer header is constructed for you unless you request otherwise. The raw socket API has been partially standardised by POSIX, whereas AF_PACKET sockets

are specific to Linux.

For these reasons, use of a raw socket is recommended unless you specifically need the extra functionality provided by working at the link layer.

Further reading

packet(7) (Linux manpage)

Send an arbitrary IPv4 datagram using a raw socket in C

Content

1 Objective 2 Background 3 Scenario 4 Method o 4.1 Overview o 4.2 Select the required protocol number o 4.3 Create the raw socket o 4.4 Optionally, set the IP_HDRINCL socket option o 4.5 Send the datagram o 4.6 Send the datagram (using sendto) o 4.7 Send the datagram (using sendmsg) 5 Variations o 5.1 Sending to the IPv4 broadcast address 6 Alternatives o 6.1 Sending at the link layer 7 See also 8 Further reading

Tested on

Debian (Lenny)

Ubuntu (Lucid)

Objective

To send an arbitrary IPv4 datagram using a raw socket in C

Background

Most programs that communicate using the Internet Protocol do so through a transport-layer protocol such as TCP or UDP and have no need to deal directly with Internet Protocol datagrams, but there are some circumstances where it is necessary to interact with the network stack at a lower level. These include:

implementation of transport-layer protocols that are not built in to the network stack, and production of malformed or otherwise non-standard datagrams for testing purposes.

Scenario

Suppose that you wish to send an ICMP echo request to a given IPv4 address. (This is what the ping command does to determine whether there is a reachable host at that address.)

There is no POSIX API call that provides this functionality per se. You therefore intend to assemble an ICMP message with the required content then send it as the payload of an IP datagram using a raw socket.

http://www.microhowto.info/howto/send_an_arbitrary_ipv4_datagram_using_a_raw_socket_in_c.html#idp86480

















Method

Overview


1. Select the required protocol number.2. Create the raw socket.3. Optionally, set the IP_HDRINCL socket option.4. Construct the datagram.5. Send the datagram.

The following header files will be needed:

#include <errno.h>#include <unistd.h>#include <netdb.h>#include <sys/socket.h>#include <netinet/in.h>

Note that POSIX-compatible operating systems are not obliged to support raw sockets at all, and the API that has been fully standardised is quite restrictive. For this reason it is often necessary for programs that use raw sockets to stray into the realm of implementation-defined behaviour. They are also likely to require elevated privileges in order to run.

Select the required protocol number

All IPv4 traffic is labelled with a protocol number to distinguish between the various transport-layer protocols (such as TCP and UDP) that IPv4 can carry. You will need this number:

when opening the raw socket (unless you choose IPPROTO_RAW for the protocol number on a system that interprets this as a wildcard), and/or

when constructing the IP datagram header (if you choose to do this yourself instead of allowing it to be added automatically).

There are several sources from which protocol numbers can be obtained:

Some protocol numbers are defined as constants by the API. POSIX defines IPPROTO_TCP, IPPROTO_UDP and IPPROTO_ICMP, and glibc defines many more.

Protocol numbers can be looked up at run time by calling the function getprotobyname. IANA maintains a list of assigned protocol numbers.

Unlike a TCP or UDP port number there is little risk of an assigned IP protocol number ever needing to change, especially for a widely-used protocol such as ICMP. For this reason there is no real need to look up the protocol number at runtime, and it is quite reasonable for the required value to be hard-coded.

http://www.iana.org/assignments/protocol-numbers/protocol-numbers.xml

For this particular example there is a symbolic constant, IPPROTO_ICMP, that all POSIX-compatible operating systems are supposed to provide. The simplest solution would be to use that. If you instead want to call getprotobyname then this can be done as follows:

const char* protocol_name="icmp";struct protoent* protocol=getprotobyname(protocol_name);if (!protocol) { die("Protocol %s not found",protocol_name);}int protocol_number=protocol->p_proto;

Note that getprotobyname is not thread-safe. In a multi-threaded program it would be advisable to look up any required protocol numbers at the outset if this is practicable.

Create the raw socket

The socket that will be used to send the IP datagram should be created using the socket function. This takes three arguments:

1. the domain (AF_INET in this case, meaning IPv4),2. the socket type (SOCK_RAW in this case, meaning that the socket should provide direct access

to the network layer without any transport-layer protocol), and3. the protocol (normally corresponding to the protocol field in the Internet Protocol header).

An alternative to specifying the protocol number as the third argument is to use the value IPPROTO_RAW. POSIX does not generally allow this, but some implementations use it as a wildcard or a dummy value. (In the case of Linux it allows any protocol to be sent (with headers) but nothing can be received.)

In this instance the socket will be used for sending ICMP messages, therefore the third argument should be set to IPPROTO_ICMP:

int fd=socket(AF_INET,SOCK_RAW,IPPROTO_ICMP);if (fd==-1) { die("%s",strerror(errno));}

Optionally, set the IP_HDRINCL socket option

POSIX does not specify the format in which a datagram should be written to a raw socket, however the following behaviour is typical:

By default the header is generated automatically, therefore only the payload should be written.

If the IP_HDRINCL socket option is set then the header should be constructed by the caller and both header and payload written to the socket.

The protocol level for IP_HDRINCL is IPPROTO_IP. The parameter is a boolean value that is usually represented by an int. It should be set to zero to disable header inclusion or non-zero to enable it:

int hdrincl=1;if (setsockopt(fd,IPPROTO_IP,IP_HDRINCL,&hdrincl,sizeof(hdrincl))==-1) { die("%s",strerror(errno));}

Support for IP_HDRINCL is quite common, but the details vary as to:

the byte order that should be used for each of the header fields (which is not necessarily the same for all fields), and

which fields (if any) are filled in automatically.

Some operating systems set IP_HDRINCL implicitly when IPPROTO_RAW is selected (on the grounds that it would make little sense not to supply a header in that case) but others require an explicit call to setsockopt. If you want to enable header inclusion then it is prudent to set it regardless, in order to accommodate either behaviour.

Send the datagram

Raw datagrams can in principle be sent using any function that is capable of writing to a file descriptor, however it is often necessary to use either sendto or sendmsg so that a destination address can be specified. There are two possible reasons for this:

If the header will be constructed automatically then the network stack needs to know what the destination address field should be set to.

You may want to route the datagram towards an address that differs from the one specified in the IP header.

Of sendto and sendmsg the latter is the more flexible option, but at the cost of a signficiantly more complex interface. Details for each function are given below.


In this particular example the payload to be sent is an ICMP echo request, which can be constructed as follows:

const size_t req_size=8;struct icmphdr req;req.type=8;req.code=0;req.checksum=0;req.un.echo.id=htons(rand());req.un.echo.sequence=htons(1);req.checksum=ip_checksum(&req,req_size);

This makes use of the icmphdr structure provided by glibc and the ip_checksum function described in the microHOWTO ‘Calculate an Internet Protocol checksum in C’. Note that sizeof(req) cannot be used to obtain the size of the payload because struct icmphdr is not specific to echo requests, so the constant req_size has been defined for this purpose.

Send the datagram (using sendto)

To call sendto you must supply the content of the datagram and the remote address to which it should be sent:

if (sendto(fd,&req,req_size,0, res->ai_addr,res->ai_addrlen)==-1) { die("%s",strerror(errno));}


The value returned by sendto is the number of bytes sent, or -1 if there was an error. Raw datagrams are sent atomically, so unlike when writing to a TCP socket there is no need to wrap the function call in a loop to handle partially-sent data.

Send the datagram (using sendmsg)


struct iovec iov[1];iov[0].iov_base=&req;iov[0].iov_len=req_size;

struct msghdr message;message.msg_name=res->ai_addr;message.msg_namelen=res->ai_addrlen;message.msg_iov=iov;message.msg_iovlen=1;message.msg_control=0;message.msg_controllen=0;




http://microhowto.hellmouth.org.uk/howto/calculate_an_internet_protocol_checksum_in_c.html


Variations

Sending to the IPv4 broadcast address

By default, attempts to send a datagram to the broadcast address are rejected with an error (typically EACCES, however it is not obvious from the POSIX specification which error should occur). This is a safety measure intended to reduce the risk of making unintended broadcasts. It can be overridden by setting the SO_BROADCAST socket option:

int broadcast=1;if (setsockopt(fd,SOL_SOCKET,SO_BROADCAST, &broadcast,sizeof(broadcast))==-1) { die("%s",strerror(errno));}

Alternatives

Sending at the link layerSee: Send an arbitrary Ethernet frame using libpcap

Send an arbitrary Ethernet frame using an AF_PACKET socket in C

Raw sockets of the type described above operate at the network layer. An alternative would be to inject packets at the link layer, for example in the form of Ethernet frames. This can be done using libpcap or (on Linux-based systems) using an AF_PACKET socket.

This approach makes it possible to implement any network-layer protocol, whether or not it is explicitly supported by the network stack, but also brings a number of disadvantages which result from operating at a lower level of abstraction:

The sender must construct the network layer header, and depending on the method of injection, perhaps also the link layer header.

The sender must take responsibility for routing and link-layer address resolution (although it may be possible to delegate these tasks back to the operating system rather than implementing them from scratch).

The above cannot normally be done without knowledge of the link layer protocol, which will typically need to be coded into the sending program on a case-by-case basis.


http://www.microhowto.info/howto/send_an_arbitrary_ethernet_frame_using_an_af_packet_socket_in_c.html

http://www.microhowto.info/howto/send_an_arbitrary_ethernet_frame_using_libpcap.html

See also

Send a UDP datagram in C Establish a TCP connection in C

Further reading

raw(7) (Linux manpage) The Open Group, sendto, Base Specifications Issue 6 The Open Group, sendmsg, Base Specifications Issue 6 ithilgore, SOCK_RAW Demystified, May 2008

Send an arbitrary Ethernet frame using libpcap

Content

1 Objective 2 Background 3 Scenario 4 Method o 4.1 Overview o 4.2 Select the required EtherType o 4.3 Construct the Ethernet frame

http://www.microhowto.info/howto/send_an_arbitrary_ethernet_frame_using_libpcap.html#idp29440







http://sock-raw.org/papers/sock_raw

http://pubs.opengroup.org/onlinepubs/009695399/functions/sendmsg.html

http://pubs.opengroup.org/onlinepubs/009695399/functions/sendto.html

http://www.kernel.org/doc/man-pages/online/pages/man7/raw.7.html

http://www.microhowto.info/howto/establish_a_tcp_connection_in_c.html

http://www.microhowto.info/howto/send_a_udp_datagram_in_c.html

o 4.4 Obtain a PCAP descriptor by calling pcap_open_live o 4.5 Send the Ethernet frame by calling pcap_inject o 4.6 Close the PCAP descriptor by calling pcap_close 5 Example program 6 Alternatives o 6.1 Using an AF_PACKET socket o 6.2 Using a raw socket 7 Further reading

Tested on

Debian (Lenny, Squeeze)

Ubuntu (Lucid)

Objective

To send an arbitrary Ethernet frame using libpcap

Background

Ethernet is a link layer protocol. Most networking programs interact with the network stack at the transport layer or above, so have no need to deal with Ethernet frames directly, but there are some circumstances where interaction at a lower level may be necessary. These include:

implementation of Ethernet-based protocols that are not built in to the network stack, and production of malformed or otherwise non-standard frames for testing purposes.

Scenario

Suppose that you wish to send an ARP request for a given IP address from a given Ethernet interface. You wish to use libpcap to perform the sending.

(ARP is the Address Resolution Protocol. It is used when a host needs to send a datagram to a given IP address, but does not know which MAC address corresponds to that IP address. It is described in RFC 826 .)

Method

Overview


1. Select the required EtherType.2. Construct the Ethernet frame.3. Obtain a PCAP descriptor by calling pcap_open_live.

http://www.ietf.org/rfc/rfc826.txt









4. Send the Ethernet frame by calling pcap_inject.5. Close the PCAP descriptor by calling pcap_close.


Header Used by

<stdio.h> fprintf

<stdlib.h> exit

<pcap.h> pcap_open_live, pcap_inject, pcap_close, pcap_perror

Be aware that:

Not all network devices are Ethernet interfaces, or use an Ethernet-compatible frame format, or support packet injection using libpcap.

Although a link-layer header must be supplied, libpcap does not promise to use it as-is: both the source address and the EtherType are at risk of being altered.

Programs that send raw packets, using this or any other method, are likely to require elevated privileges in order to run.

Select the required EtherType

The EtherType of an Ethernet frame specifies the type of payload that it contains. There are several sources from which EtherTypes can be obtained:

On Linux-based systems the header file <linux/if_ether.h> provides constants for most commonly-used EtherTypes. Examples includeETH_P_IP for the Internet Protocol (0x8000), ETH_P_ARP for the Address Resolution Protocol (0x0806) and ETH_P_8021Q for IEEE 802.1Q VLAN tags (0x8100).

The IEEE maintains the definitive list of registered EtherTypes. A semi-official list is maintained by IANA.

If you need an EtherType for experimental or private use then the values 0x88b5 and 0x88b6 have been reserved for that purpose.

Construct the Ethernet frame

Frames sent using libpcap must:

have a link-layer header (there is no option for this to be added automatically), and be presented to libpcap as a single, contiguous block of memory (there is no equivalent of

the scatter/gather capability provided by readmsg and sendmsg).

http://www.iana.org/assignments/ethernet-numbers

http://standards.ieee.org/develop/regauth/ethertype/eth.txt

See the example program below for how this might be done in the specific case where you want to send an ARP request. Be aware that:

Most network protocols require that multi-byte values be converted to network byte order. Structures may have padding added by the compiler (although ones provided by system

headers ought to be safe). C and C++ place restrictions on when pointer casts can be safely used to convert data from

one type to another.

You will probably need to know the MAC address of the interface from which the packet will be sent. On Linux-based systems this can be obtained using the ioctl command SIOCGIFHWADDR. See the microHOWTO Get the MAC address of an Ethernet interface in C using SIOCGIFHWADDR for details.

As noted previously, libpcap does not provide guarantee that the link-layer header that is sent will be identical to the one that was provided.

Obtain a PCAP descriptor by calling pcap_open_live

To access a network interface via libpcap it is necessary to have an open packet capture descriptor. This is a pointer of type pcap_t* and can be obtained by calling pcap_open_live:

char pcap_errbuf[PCAP_ERRBUF_SIZE];pcap_errbuf[0]='\0';pcap_t* pcap=pcap_open_live(if_name,96,0,0,pcap_errbuf);if (pcap_errbuf[0]!='\0') { fprintf(stderr,"%s",pcap_errbuf);}if (!pcap) { exit(1);}

The first argument to pcap_open_live is the name of the interface from which the Ethernet frame is to be sent, for example eth0. (Remember that not all interfaces are suitable for sending Ethernet frames.)

The second, third and fourth arguments are the snapshot length, promiscuous mode flag and timeout. These control how packets are captured, and for the task in hand it is unimportant what values are used, but if you want to capture as well as send then you will need to ensure that they have been set appropriately (especially the snapshot length).

The last argument points to a buffer for returning error messages, which must be at least PCAP_ERRBUF_SIZE bytes long. As suggested on thepcap_open_live manpage, this has been set to the empty string before the function call then inspected afterwards in order to detect both warnings and errors.

http://www.microhowto.info/howto/get_the_mac_address_of_an_ethernet_interface_in_c_using_siocgifhwaddr.html

http://www.microhowto.info/howto/get_the_mac_address_of_an_ethernet_interface_in_c_using_siocgifhwaddr.html

Send the Ethernet frame by calling pcap_inject

Given a PCAP descriptor, frames can be sent by calling the function pcap_inject:

if (pcap_inject(pcap,&req,sizeof(req))==-1) { pcap_perror(pcap,0); pcap_close(pcap); exit(1);}

The value returned by pcap_inject is the number of bytes sent, or -1 if there was an error. In the latter case a human-readable error message can be obtained using pcap_geterr or (as in this example) printed using pcap_perror.

Close the PCAP descriptor by calling pcap_close

The PCAP descriptor should be closed once it is no longer needed:

pcap_close(pcap)

Example program

The following example program constructs and sends an ARP request using the method described above:

send_arp.c

It can be compiled using the command:

gcc -lpcap -o send_arp send_arp.c

When invoked it takes two arguments, the name of the Ethernet interface and the (numeric) IP address to which the ARP request should be directed:

./send_arp eth0 192.168.0.83

Alternatives

Using an AF_PACKET socketSee: Send an arbitrary Ethernet frame using an AF_PACKET socket in C

On Linux-based systems an alternative way to send an Ethernet frame is to use an AF_PACKET socket. This has some advantages over the use of libpcap:

http://www.microhowto.info/howto/send_an_arbitrary_ethernet_frame_using_libpcap/send_arp.c

http://www.microhowto.info/howto/send_an_arbitrary_ethernet_frame_using_an_af_packet_socket_in_c.html

It allows packets to be written directly to a POSIX socket descriptor, making it possible to use facilities such as scatter/gather and non-blocking output, and providing compatibility with libraries like libevent that act on file descriptors.

It offers a choice between having the link-layer header supplied by the sender or constructed by the network stack.

It removes a layer of indirection, and the need for libpcap to be present at compile time or run time.

The main drawback of AF_PACKET sockets their lack of portability. They are specific to Linux (version 2.2 and later), and for this reason they are not recommended where the use of libpcap (or a raw socket) is a viable alternative.

Using a raw socketSee: Send an arbitrary IPv4 datagram using a raw socket in C

Raw sockets differ from packet sockets in that they operate at the network layer as opposed to the link layer. For this reason they are limited to network protocols for which raw socket support has been explicitly built into the network stack, but they also have a number of advantages which result from operating at a higher level of abstraction:

You can write code that will work with any suitable type of network interface. Routing and link-layer address resolution are handled for you. The network layer header is constructed for you unless you request otherwise. The raw socket API has been partially standardised by POSIX.


Further reading

PCAP(3) (libpcap manpage)

Full Source below for sending via libpcap:

// Purpose: to construct an ARP request and write it to an Ethernet interface// using libpcap.//// See: "Send an arbitrary Ethernet frame using libpcap"// #include <stdio.h>#include <stdlib.h>#include <string.h>#include <pcap.h>#include <arpa/inet.h>#include <net/if.h>

http://www.tcpdump.org/pcap3_man.html

http://www.microhowto.info/howto/send_an_arbitrary_ipv4_datagram_using_a_raw_socket_in_c.html

#include <net/ethernet.h>#include <netinet/if_ether.h>#include <sys/ioctl.h>

int main(int argc,const char* argv[]) { // Get interface name and target IP address from command line. if (argc<2) { fprintf(stderr,"usage: send_arp <interface> <ipv4-address>\n"); exit(1); } const char* if_name=argv[1]; const char* target_ip_string=argv[2];

// Construct Ethernet header (except for source MAC address). // (Destination set to broadcast address, FF:FF:FF:FF:FF:FF.) struct ether_header header; header.ether_type=htons(ETH_P_ARP); memset(header.ether_dhost,0xff,sizeof(header.ether_dhost));

// Construct ARP request (except for MAC and IP addresses). struct ether_arp req; req.arp_hrd=htons(ARPHRD_ETHER); req.arp_pro=htons(ETH_P_IP); req.arp_hln=ETHER_ADDR_LEN; req.arp_pln=sizeof(in_addr_t); req.arp_op=htons(ARPOP_REQUEST); memset(&req.arp_tha,0,sizeof(req.arp_tha));

// Convert target IP address from string, copy into ARP request. struct in_addr target_ip_addr={0}; if (!inet_aton(target_ip_string,&target_ip_addr)) { fprintf(stderr,"%s is not a valid IP address",target_ip_string); exit(1); } memcpy(&req.arp_tpa,&target_ip_addr.s_addr,sizeof(req.arp_tpa));

// Write the interface name to an ifreq structure, // for obtaining the source MAC and IP addresses. struct ifreq ifr; size_t if_name_len=strlen(if_name); if (if_name_len<sizeof(ifr.ifr_name)) { memcpy(ifr.ifr_name,if_name,if_name_len); ifr.ifr_name[if_name_len]=0; } else { fprintf(stderr,"interface name is too long"); exit(1); }

// Open an IPv4-family socket for use when calling ioctl. int fd=socket(AF_INET,SOCK_DGRAM,0); if (fd==-1) { perror(0); exit(1); }

// Obtain the source IP address, copy into ARP request if (ioctl(fd,SIOCGIFADDR,&ifr)==-1) { perror(0); close(fd); exit(1); } struct sockaddr_in* source_ip_addr = (struct sockaddr_in*)&ifr.ifr_addr; memcpy(&req.arp_spa,&source_ip_addr->sin_addr.s_addr,sizeof(req.arp_spa));

// Obtain the source MAC address, copy into Ethernet header and ARP request. if (ioctl(fd,SIOCGIFHWADDR,&ifr)==-1) { perror(0); close(fd); exit(1); } if (ifr.ifr_hwaddr.sa_family!=ARPHRD_ETHER) { fprintf(stderr,"not an Ethernet interface"); close(fd); exit(1); } const unsigned char* source_mac_addr=(unsigned char*)ifr.ifr_hwaddr.sa_data; memcpy(header.ether_shost,source_mac_addr,sizeof(header.ether_shost)); memcpy(&req.arp_sha,source_mac_addr,sizeof(req.arp_sha)); close(fd);

// Combine the Ethernet header and ARP request into a contiguous block. unsigned char frame[sizeof(struct ether_header)+sizeof(struct ether_arp)]; memcpy(frame,&header,sizeof(struct ether_header)); memcpy(frame+sizeof(struct ether_header),&req,sizeof(struct ether_arp));

// Open a PCAP packet capture descriptor for the specified interface. char pcap_errbuf[PCAP_ERRBUF_SIZE]; pcap_errbuf[0]='\0'; pcap_t* pcap=pcap_open_live(if_name,96,0,0,pcap_errbuf); if (pcap_errbuf[0]!='\0') { fprintf(stderr,"%s\n",pcap_errbuf); } if (!pcap) { exit(1); }

// Write the Ethernet frame to the interface. if (pcap_inject(pcap,frame,sizeof(frame))==-1) { pcap_perror(pcap,0); pcap_close(pcap); exit(1); }

// Close the PCAP descriptor. pcap_close(pcap); return 0;}

Get the MAC address of an Ethernet interface in C using SIOCGIFHWADDR

Content

1 Objective 2 Scenario 3 Method o 3.1 Overview

http://www.microhowto.info/howto/get_the_mac_address_of_an_ethernet_interface_in_c_using_siocgifhwaddr.html#idp8608




o 3.2 Create an ifreq structure for passing data in and out of ioctl

o 3.3 Provide an open socket descriptor o 3.4 Invoke ioctl o 3.5 Check the type of the returned hardware address o 3.6 Extract the hardware address from the ifreq structure 4 See also 5 Further reading

Tested on


Ubuntu (Lucid, Precise)

Objective

To get the MAC address of an Ethernet interface in C using the ioctl command SIOCGIFHWADDR

Scenario

Suppose you wish to display the MAC address of an Ethernet interface. The variable if_name points to a null-terminated string containing the name of the interface (for example, eth0).

Method

Overview

On Linux-based systems the MAC address of an interface can be obtained using the ioctl command SIOCGIFHWADDR. The method described here has five steps:

1. Create an ifreq structure for passing data in and out of ioctl.2. Provide an open socket descriptor.3. Invoke ioctl.4. Check the type of the returned hardware address.5. Extract the hardware address from the ifreq structure.


#include <errno.h>#include <string.h>#include <stdio.h>#include <sys/ioctl.h>#include <net/if.h>#include <net/if_arp.h>









Create an ifreq structure for passing data in and out of ioctl

The ifreq structure should initially contain the name of the interface to be queried, which should be copied into the ifr_name field. Since this is a fixed-length buffer you should take care to ensure that the name does not cause an overrun:

struct ifreq ifr;size_t if_name_len=strlen(if_name);if (if_name_len<sizeof(ifr.ifr_name)) { memcpy(ifr.ifr_name,if_name,if_name_len); ifr.ifr_name[if_name_len]=0;} else { die("interface name is too long");}

Provide an open socket descriptor

The socket descriptor is merely an artefact of the way in which ioctl commands are invoked generally, and is not used for any particular purpose by SIOCGIFHWADDR. It must be open and must refer to a socket (as opposed to, for example, a regular file). Any type of socket would suffice, but it should preferably not be one that requires any obscure kernel modules to be loaded. For this example a UNIX domain socket will be used:

int fd=socket(AF_UNIX,SOCK_DGRAM,0);if (fd==-1) { die("%s",strerror(errno));}

Invoke ioctl

Once you have the ifreq structure and socket descriptor then you are ready to invoke ioctl:

if (ioctl(fd,SIOCGIFHWADDR,&ifr)==-1) { int temp_errno=errno; close(fd); die("%s",strerror(temp_errno));}close(fd);

If this completes without error then the hardware address of the interface should have been returned in ifr.ifr_hwaddr in the form of a struct sockaddr.

Check the type of the returned hardware address

The length and format of the hardware address will depend on the type of network interface it belongs to, so you should not assume that it is an Ethernet MAC address. You can check the address type by inspecting the sa_family field of the sockaddr. For an Ethernet interface this should be equal to ARPHRD_ETHER:

if (ifr.ifr_hwaddr.sa_family!=ARPHRD_ETHER) { die("not an Ethernet interface");}

Other possible values of sa_family for different types of network interface can be found in the header file <net/if_arp.h>, each beginning with the prefix ARPHRD_. Note that for some of these (such as ARPHRD_LOOPBACK) there is no hardware address as such.

Extract the hardware address from the ifreq structure

Having checked its type, the address can now be safely extracted from req.ifr_hwaddr.sa_data. It is presented by an array of char, which could be a signed type, so if you wish to interpret it in any way then it should first be converted to an unsigned representation. A crude but straightforward way to achieve this is to cast the whole array to an unsigned char*:

const unsigned char* mac=(unsigned char*)ifr.ifr_hwaddr.sa_data;printf("%02X:%02X:%02X:%02X:%02X:%02X\n", mac[0],mac[1],mac[2],mac[3],mac[4],mac[5]);

See also

Get the IP address of a network interface in C using SIOCGIFADDR

Further reading

netdevice(7) (Linux manpage)


http://www.kernel.org/doc/man-pages/online/pages/man7/netdevice.7.html

http://www.microhowto.info/howto/get_the_ip_address_of_a_network_interface_in_c_using_siocgifaddr.html

Content

1 Objective 2 Scenario 3 Method o 3.1 Overview o 3.2 Create an ifreq structure for passing data in and out of ioctl o 3.3 Provide an open socket descriptor with the address family

AF_INETo 3.4 Invoke ioctl o 3.5 Extract the IP address from the ifreq structure 4 See also 5 Further reading

Tested on

Debian (Lenny)

Ubuntu (Precise, Trusty)

Objective

To get the IPv4 address of a network interface in C using the ioctl command SIOCGIFADDR

Scenario

Suppose that you wish to display the IPv4 address of a network interface. The variable if_name points to a null-terminated string containing the name of the interface (for example, eth0).

Method

Overview

On Linux-based systems, one way to obtain the IPv4 address of an interface is to use the ioctl command SIOCGIFADDR. The method described here has four steps:

1. Create an ifreq structure for passing data in and out of ioctl.2. Provide an open socket descriptor with the address family AF_INET.3. Invoke ioctl.4. Extract the IP address from the ifreq structure.

The following header files are needed when using this method:

#include <sys/ioctl.h>#include <net/if.h>#include <netinet/in.h>

http://www.microhowto.info/howto/get_the_ip_address_of_a_network_interface_in_c_using_siocgifaddr.html#idp39664











In addition, this particular implementation makes use of:

#include <errno.h>#include <string.h>#include <stdio.h>#include <arpa/inet.h>

Please note that whilst this method can be used with some network protocols other than IPv4, the Linux implementation does not support IPv6. Furthermore it is only able to return a single result for any given network protocol, so will only return one of the addresses of an interface that has several. It is not necessarily portable to other POSIX-compatible systems, and is no longer the preferred method on Linux.




Provide an open socket descriptor with the address family AF_INET

All ioctl calls need a file descriptor to act on. In the case of SIOCGIFADDR this must refer to a socket (as opposed to, for example, a regular file) and must be of the address family that you wish to obtain (AF_INET in this instance). Otherwise any type of socket would suffice, but it should preferably not be one that requires any obscure kernel modules to be loaded. For this example a UDP socket will be used:

int fd=socket(AF_INET,SOCK_DGRAM,0);if (fd==-1) { die("%s",strerror(errno));}

Invoke ioctl


if (ioctl(fd,SIOCGIFADDR,&ifr)==-1) { int temp_errno=errno; close(fd);

die("%s",strerror(temp_errno));}close(fd);

If this completes without error then the hardware address of the interface should have been returned in ifr.ifr_addr in the form of a struct sockaddr_in.

Extract the IP address from the ifreq structure

If an address was returned at all then it ought to be an IPv4 address, because that was the address family of the socket. To obtain the numerical value of the address you should:

1. Cast the returned address to a struct sockaddr_in.2. Extract the sin_addr field of this structure to obtain a struct in_addr.3. Extract the s_addr field of the in_addr structure to obtain an in_addr_t (equivalent to

a uint32_t).4. Finally, convert the s_addr field (which is in network byte order) into whatever

representation you require.

struct sockaddr_in* ipaddr = (struct sockaddr_in*)&ifr.ifr_addr;printf("IP address: %s\n",inet_ntoa(ipaddr->sin_addr));

See also


Further reading

netdevice(7) , Linux manpage

(Note that SIOCGIFADDR was not documented in netdevice(7) until version 3.40 of the Linux man-pages project, which was released in April 2012, so at the time of writing it had not been incorporated into the stable releases of most GNU/Linux distributions. The ioctl itself has been present in Linux since 1993.)

http://man7.org/linux/man-pages/man7/netdevice.7.html

http://www.microhowto.info/howto/get_the_ip_address_of_a_network_interface_in_c_using_siocgifaddr.html

Get the index number of a Linux network interface in C using SIOCGIFINDEX

Content

1 Objective 2 Background 3 Scenario 4 Method o 4.1 Overview o 4.2 Create an ifreq structure for passing data in and out of

ioctlo 4.3 Provide an open socket descriptor o 4.4 Invoke ioctl 5 Further reading

Tested on


Ubuntu (Lucid, Precise, Trusty)

Objective

To get the index number of a Linux network interface in C using the ioctl command SIOCGIFINDEX

Background

Network interfaces are usually identified by name in user-facing contexts, but for some APIs a number is used instead. A notable example is thesin6_scope_id field of an IPv6 socket address with link scope. Indices are also used in some types of netlink message (particularly those concerned with routing) and in socket addresses for AF_PACKET sockets.

The interface index is typically not the same as the suffix which may form part of the interface name. For example, on one of the machines tested by the author, eth0 had an index of 2. You should not assume that they will be the same on other machines, or that they will necessarily remain the same following a reboot.

Scenario

Suppose you wish to send a raw Ethernet frame using an AF_PACKET socket. To do this you need to know the index number of the network interface from which the frame is to be sent.

The variable if_name points to a null-terminated string containing the name of the interface.

http://www.microhowto.info/howto/get_the_index_number_of_a_linux_network_interface_in_c_using_siocgifindex#idp31536










Method

Overview

On Linux-based systems the index number of a network interface can be obtained using the ioctl command SIOCGIFINDEX. The method described here has five steps:

1. Create an ifreq structure for passing data in and out of ioctl.2. Provide an open socket descriptor.3. Invoke ioctl.


#include <errno.h>#include <string.h>#include <sys/ioctl.h>#include <net/if.h>




Provide an open socket descriptor

The socket descriptor is merely an artefact of the way in which ioctl commands are invoked generally, and is not used for any particular purpose by SIOCGIFINDEX. It must be open and must refer to a socket (as opposed to, for example, a regular file).

In many of the circumstances were you would use SIOCGIFINDEX there will already be an open socket that you can use. For example, in the particular scenario described above you could open the AF_PACKET socket first and use that. Otherwise, you will need to open one specifically for the purpose of being an argument to ioctl. Any type of socket would suffice, but it should preferably not be one that requires any obscure kernel modules to be loaded:

int fd=socket(AF_UNIX,SOCK_DGRAM,0);if (fd==-1) { die("%s",strerror(errno));

}

Invoke ioctl


if (ioctl(fd,SIOCGIFINDEX,&ifr)==-1) { die("%s",strerror(errno));}

If this completes without error then the interface index should have been returned in ifr.ifr_ifindex.

Further reading

netdevice(7) (Linux manpage)

Cause a process to become a daemon in C

Content

1 Objective 2 Background and Scenario 3 Method o 3.1 Fork, allowing the parent process to terminate o 3.2 Start a new session for the daemon by calling setsid o 3.3 Fork again, allowing the parent process to terminate o 3.4 Change the current working directory to a safe location o 3.5 Set the umask to zero o 3.6 Close then reopen stdin, stdout and stderr o 3.7 The complete method as a function 4 Testing 5 Variations o 5.1 Redirect stdout and stderr to a logfile o 5.2 Using SIGHUP for other purposes 6 Methods to avoid o 6.1 Use the daemon function

Tested on

Debian (Etch, Lenny, Squeeze)

Fedora (14)

Ubuntu (Hardy, Intrepid, Jaunty,

http://www.microhowto.info/howto/cause_a_process_to_become_a_daemon_in_c.html#idp54624
















http://www.kernel.org/doc/man-pages/online/pages/man7/netdevice.7.html

Karmic, Lucid, Maverick, Natty, Precise, Trusty)

Objective

To cause a process to become a daemon in C

Background and Scenario

See Cause a process to become a daemon. That page also gives a more detailed rationale for the method, which is explained only in outline here.

A mechanism is needed for handling errors. The example code shown below assumes that there is a function called die provided for this purpose, which takes the same arguments as printf and does not return.

Method

Fork, allowing the parent process to terminate

Calling fork has three possible types of return value:

-1 indicates failure (most likely due to lack of memory, although it is possible to run out of other resources such as PIDs).

0 indicates that the child is running, in which case execution should continue with the next step of the daemonisation process.

Any other value indicates that the parent is running, in which case the process should terminate by calling _exit.

pid_t pid = fork();if (pid == -1) { die("failed to fork while daemonising (errno=%d)",errno);} else if (pid != 0) { _exit(0);}

Start a new session for the daemon by calling setsid

This operation should never fail, because the current process should not now be a process group leader, however we check anyhow as a precaution:

if (setsid()==-1) { die("failed to become a session leader while daemonising(errno=%d)",errno);}

http://www.microhowto.info/howto/cause_a_process_to_become_a_daemon.html

Fork again, allowing the parent process to terminate

This is a repeat of the first step, except that a handler must be installed for SIGHUP:

signal(SIGHUP,SIG_IGN);pid=fork();if (pid == -1) { die("failed to fork while daemonising (errno=%d)",errno);} else if (pid != 0) { _exit(0);}

The SIGHUP handler must remain in place until it has absorbed the SIGHUP that the parent is expected to send when it terminates. See below if you wish to install a SIGHUP handler for other purposes.

Change the current working directory to a safe location

The root directory is used here, as it is always a safe location and can be changed later if required:

if (chdir("/") == -1) { die("failed to change working directory while daemonising (errno=%d)",errno);}

Set the umask to zero

Daemons normally operate with a umask of zero. Again, this can be changed later if required:

umask(0);

Close then reopen stdin, stdout and stderr

The POSIX specification requires that /dev/null be provided, therefore the daemon can reasonably depend on this device being available provided that they fail gracefully if it is not.

When stderr is opened it must be both readable and writable. It is sufficient for stdin to be readable and stdout to be writable. If stdout orstderr refer to a regular file then they should be configured to append to it (by means of the O_APPEND flag). Because the open function always chooses the lowest unused file descriptor, by reopening the streams in ascending order it is possible to avoid the use of dup2:

close(STDIN_FILENO);close(STDOUT_FILENO);close(STDERR_FILENO);if (open("/dev/null",O_RDONLY) == -1) {

die("failed to reopen stdin while daemonising (errno=%d)",errno);}if (open("/dev/null",O_WRONLY) == -1) {

die("failed to reopen stdout while daemonising (errno=%d)",errno);

}if (open("/dev/null",O_RDWR) == -1) {

die("failed to reopen stderr while daemonising (errno=%d)",errno);}

See below if you want to direct stdout and stderr to a logfile.

The complete method as a function

#include <errno.h>#include <signal.h>#include <fcntl.h>#include <unistd.h>

void daemonise() { // Fork, allowing the parent process to terminate. pid_t pid = fork(); if (pid == -1) { die("failed to fork while daemonising (errno=%d)",errno); } else if (pid != 0) { _exit(0); }

// Start a new session for the daemon. if (setsid()==-1) { die("failed to become a session leader while daemonising(errno=%d)",errno); }

// Fork again, allowing the parent process to terminate. signal(SIGHUP,SIG_IGN); pid=fork(); if (pid == -1) { die("failed to fork while daemonising (errno=%d)",errno); } else if (pid != 0) { _exit(0); }

// Set the current working directory to the root directory. if (chdir("/") == -1) { die("failed to change working directory while daemonising (errno=%d)",errno); }

// Set the user file creation mask to zero. umask(0);

// Close then reopen standard file descriptors. close(STDIN_FILENO); close(STDOUT_FILENO); close(STDERR_FILENO); if (open("/dev/null",O_RDONLY) == -1) { die("failed to reopen stdin while daemonising (errno=%d)",errno); } if (open("/dev/null",O_WRONLY) == -1) { die("failed to reopen stdout while daemonising (errno=%d)",errno); } if (open("/dev/null",O_RDWR) == -1) { die("failed to reopen stderr while daemonising (errno=%d)",errno); }}

Testing

See Cause a process to become a daemon.

http://www.microhowto.info/howto/cause_a_process_to_become_a_daemon.html

Variations

Redirect stdout and stderr to a logfile

When directing output to a logfile, it is best to open the file before closing stderr to ensure that the daemon is not left with no means of reporting errors:

close(STDIN_FILENO);if (open("/dev/null",O_RDONLY) == -1) { die("failed to reopen stdin while daemonising (errno=%d)",errno);}int logfile_fileno = open(logfile_pathname,O_RDWR|O_CREAT|O_APPEND,S_IRUSR|S_IWUSR|S_IRGRP);if (logfile_fileno == -1) { die("failed to open logfile (errno=%d)",errno);}dup2(logfile_fileno,STDOUT_FILENO);dup2(logfile_fileno,STDERR_FILENO);close(logfile_fileno);

Note that dup2 will close the target file descriptor if necessary, so there is no need to do this explicitly.

Using SIGHUP for other purposes

Daemons often interpret SIGHUP as a request to reread the configuration file. A signal handler must be installed to perform this function, however it must not become fully active until after the parent process of the second fork operation has terminated (as that event will generate a SIGHUP).

One solution is to use a flag within the handler function to treat the first call differently:

void handle_sighup(int signum) { static bool first=true; if (first) { first=false; return; } // Insert remainder of handler here.}

When installing the signal handler, it is better to use sigaction in preference to the signal function because that allows the SA_RESTART flag to be used. Without this, it is necessary to place a loop around any system function that is capable of returning EINTR:

struct sigaction sa;sa.sa_handler = handle_sighup;sigemptyset(&sa.sa_mask);sa.sa_flags = SA_RESTART;if (sigaction(SIGHUP,&sa,0) == -1) { die("failed to install SIGHUP handler (errno=%d)",errno);}

Methods to avoid

Use the daemon function

Many POSIX-based operating systems provide a function called daemon which performs some or all of the steps listed above. Unfortunately it has three significant drawbacks:

It is not available on all systems. Its behaviour is not standardised (or necessarily well-documented). Its behaviour is more difficult to customise.

For these reasons, any benefit gained by using the daemon function is likely to be a short-term one at best.

Tags: c | posix | process

Pad an integer with leading zeros in C++

Content

1 Objective 2 Scenario 3 Method 4 Alternative o 4.1 Using sprintf

Tested on

Ubuntu (Lucid, Precise)

Objective

To pad an integer with leading zeros to a given minimum width when converting it to a character string in C++

Scenario

Suppose you are writing a program for generating customer invoices. Each customer has an account number. These are represented internally as integers, but when converted to character strings for display or printing you want them to be padded to 8 digits using leading zeros.

http://www.microhowto.info/howto/pad_an_integer_with_leading_zeros_in_c++.html#idp21968





http://www.microhowto.info/tags/process.html

http://www.microhowto.info/tags/posix.html

http://www.microhowto.info/tags/c.html

Method

The method described here uses the C++ iostream library to perform the conversion. It requires an output stream for the result to be sent to, however a std::ostringstream can be used to capture the character sequence and present it as a std::string if required. Padding with zeros is achieved by combining the effect of three standard manipulators:

std::setw, to specify the required width in characters of the next field written to the stream,

std::setfill, to specify the character used for padding if the required width of a field is greater than its natural width, and

std::internal, to arrange for padding to occur after the sign but before the remainder of the number.

The required header files are:

Header Used by

<ios> std::internal

<iomanip> std::setw, std::setfill

<sstream> std::ostringstream

If you are using a std::ostringstream that will be discarded immediately after the conversion then simply write the three manipulators to the stream (in any order) followed by the value to be converted:

std::string format_account_number(int acct_no) { ostringstream out; out << std::internal << std::setfill('0') << std::setw(8) << acct_no; return out.str();}

If the stream will be used subsequently for other purposes then you will probably want to reset the fill character and field adjustment properties, otherwise they will remain in effect for later output. It is not necessary to do this for the field width, which is automatically reset to zero after each field is written:

void write_account_number(std::ostream& out, int acct_no) { out << std::internal << std::setfill('0') << std::setw(8) << acct_no; out << std::left << std::setfill(' ');}

The std::internal manipulator can be omitted if the number is unsigned or known to be non-negative, but it is needed in the general case because because otherwise the padding characters will

be inserted at the far left of the field by default (producing output such as 000000-1 as opposed to -0000001).

Be aware that because std::setw controls the total width of the field (including the sign if there is one), with the consequence that negative values will by default be one digit shorter than non-negative values. If this is a problem then std::showpos can be used to ensure that there is always a sign (plus or minus), in which case the number of digits remains constant.

Floating point values can be padded in a similar manner. The same applies to character strings, except that std::internal would be ineffective.

Alternative

Using sprintf

A similar effect can be achieved using std::snprintf from <cstdio>:

std::string format_account_number(int acct_no) { char buffer[9]; std::snprintf(buffer, sizeof(buffer), "%08d", acct_no); return buffer;}

For a typical implementation of the standard library this method is likely to be significantly faster than using a std::ostringstream (and would be faster still if std::string were avoided too). The cost is that buffer management and type safety become your responsibility, with undefined behaviour the likely consequence if you make a mistake.

A minor difference is that std::snprintf will truncate to whatever buffer length you have chosen, whereas std::ostringstream will not.

Build a shared library using GCC

Content

1 Objective 2 Background 3 Scenario 4 Method o 4.1 Overview o 4.2 Choose an soname (if required) o 4.3 Compile the source code using the -fPIC option o 4.4 Link the object code using the -fPIC and -shared options 5 Testing 6 Alternatives o 6.1 Using GNU Libtool 7 Further reading

http://www.microhowto.info/howto/build_a_shared_library_using_gcc.html#idp53776












Tested on


Ubuntu (Hardy, Intrepid, Jaunty, Karmic, Lucid, Maverick, Natty,

Oneiric, Precise, Quantal)

Objective

To build a shared library using GCC

Background

Programs can be linked against libraries either at compile time or at run time. An advantage of linking at run time is that a single copy of the library can be shared between many programs, both on disc and in memory. Libraries suitable for use in this way are known as shared libraries.

On modern Linux-based systems, shared libraries differ from static ones in the following respects:

they are ELF files (as opposed to archives compatible with the ar program), they have a dynamic symbol table (in addition to a static table), and the code within them must be position-independent.

For these reasons, some adjustments to the build process are needed to create a shared library instead of a static one.

Scenario

Suppose that you are building a library named libqux which is written in C. There are three source files: foo.c, bar.c and baz.c.

The current version number of libqux is 1.5.0. It is fully backward-compatible with the previous version, 1.4.1, which had an soname oflibqux.so.1.

Method

Overview

The method described here has three steps:

1. Choose an soname (if required).2. Compile the source code using the -fPIC option.3. Link the object code using the -fPIC and -shared options.

Choose an soname (if required)

An soname (‘shared object name’) is a label used when declaring shared library dependencies. Each executable contains a list of shared libraries that it needs in order to execute. Shared libraries can similarly declare dependencies on other shared libraries. This can be done using pathnames, but if the required library has an soname then that will be used in preference.

Typically the pathname of a library will change whenever a new version is installed, whereas the soname should change only when the new version is incompatible with its predecessors to the extent that it cannot be used their place. It follows that when dependencies are declared using sonames, the library used at runtime need not be an exact match for the one present at build time:

For a given library with a given soname, only the most recent version need be installed. Where there is a need for two versions of the same library to be installed alongside each

other, they can be distinguished because they have different sonames.

It is the degree of binary compatibility which determines whether the soname should change. For example, new functions can be added without breaking backward compatibility, but you cannot normally change the prototype of an existing function, nor do anything that could change the layout of a data structure. You should also consider changes made to the high-level behaviour of the library, as these can have an equally significant effect on backwards compatibility.

In this particular instance, version 1.4.1 of libqux had an soname of libqux.so.1. Since version 1.5.0 is backwards-compatibile it can use the same soname. If this had not been the case then the soname would have needed to change, most likely to libqux.so.2.

Compile the source code using the -fPIC option

Object code intended for use in a shared library must be ‘position-independent’, meaning that it can execute without first being modified to account for where it has been loaded in memory. It remains necessary to allow for the location of other libraries, but any internal references are required to be position-independent.

GCC can be instructed to generate position independent code using the -fPIC option:

gcc -c -fPIC -o foo.o foo.cgcc -c -fPIC -o bar.o bar.cgcc -c -fPIC -o baz.o baz.c

This option is not enabled by default because it tends to cause some loss of performance, and for purposes other than building shared libraries it is often not necessary.

Link the object code using the -fPIC and -shared options

The default behaviour of the gcc and g++ commands when linking is to produce an executable program. They can be instructed to produce a shared library instead by means of -shared option:

gcc -shared -fPIC -Wl,-soname,libqux.so.1 -o libqux.so.1.5.0 foo.o bar.o baz.o -lc

The -fPIC option is needed when linking as it was when compiling to ensure that any code added by the linker is compatible with code previously generated by the compiler.

The -Wl option passes a comma-separated list of arguments to the linker. As its name suggests, -sonamespecifies the required soname. If these options are omitted then the library will not have an soname.

The ldconfig manpage recommends explicitly linking against libc, which has been done above using the-l option (-lc).

Testing

One way to test the library is to install it in a directory on the library search path. /usr/local/lib is usually the most appropriate choice. You will need to create softlinks corresponding to the soname of the library, and the name used to refer to the library when building the executable, if these are different from the filename:

ln -s libqux.so.1.5.0 libqux.so.1ln -s libqux.so.1.5.0 libqux.so

A partial alternative is to run ldconfig, which automatically creates the first of the above softlinks but not the second. However you do it, this method of testing normally requires administrative privileges. Once installed, it should be possible to link against the library using -l:

gcc main.c -lqux

If you cannot or do not want to move the library to /usr/local/lib then it is possible to link against the library in situ. At build time this can be done by listing the pathname of the library as an argument to gcc without use of the -l option:

gcc main.c libqux.so.1.5.0

At load time you will need to add the relevant directory to the library search path. This can be done by setting the environment variableLD_LIBRARY_PATH, for example:

export LD_LIBRARY_PATH=`pwd`

As above, you will need to create a softlink corresponding to the soname of the library. If there is a need to search multiple directories then they should be specified as a colon-separated list in LD_LIBRARY_PATH.

Alternatives

Using GNU Libtool

Libtool is part of GNU Autotools. Its purpose is to simplify the process of building shared libraries, particularly those intended for use on multiple platforms. For example, for the scenario described above you could use the following sequence of commands:

libtool --mode=compile gcc -c foo.clibtool --mode=compile gcc -c bar.clibtool --mode=compile gcc -c baz.clibtool --mode=link gcc -o libqux.la foo.lo bar.lo baz.lo -rpath /usr/local/lib -version-info 6:0:5

You may not need to these commands explicitly, because Libtool is often used in conjunction with Automake which has the ability to generate them automatically, but it is equally suitable for use as a stand-alone utility if that suits your purpose.

Be aware that Libtool requires the use of a specific numbering scheme for specifying the interface version (passed using the -version-infooption above), and that this should almost certainly not be equal to the release version. The Libtool manual describes when and how these values should be changed.

Further reading

Program Library HOWTO , David A Wheeler Libtool’s versioning system , GNU Libtool Manual, GNU Project Vaughan et al, Library Versioning, GNU Autoconf, Automake and Libtool ldconfig(8) (Ubuntu manpage)

Capture the output of a child process in C

Content

1 Objective 2 Scenario 3 Method o 3.1 Overview o 3.2 Create a new pipe using the pipe function o 3.3 Connect the entrance of the pipe to STDOUT_FILENO within the child

processo 3.4 Close the entrance of the pipe within the parent process o 3.5 Close the exit from the pipe within the child process o 3.6 Sample code 4 Alternatives

http://www.microhowto.info/howto/capture_the_output_of_a_child_process_in_c.html#idp45568











http://manpages.ubuntu.com/manpages/precise/man8/ldconfig.8.html

http://www.sourceware.org/autobook/autobook/autobook_91.html

http://www.gnu.org/software/libtool/manual/html_node/Libtool-versioning.html

http://tldp.org/HOWTO/Program-Library-HOWTO/shared-libraries.html

http://www.gnu.org/software/libtool/

o 4.1 Using O_CLOEXEC to close file descriptors o 4.2 Using popen 5 See also 6 Further reading

Tested on




Objective

To capture the standard output of a child process in C

Scenario

Suppose that you are writing a program which executes a command as a child process using fork and exec:

pid_t pid = fork();if (pid == -1) { perror("fork"); exit(1);} else if (pid == 0) { execl(cmdpath, cmdname, (char*)0); perror("execl"); _exit(1);}

The command is expected to write some text to stdout and you wish to capture this output for use by the parent process.

Method

Overview

The method described here has four steps:

1. Create a new pipe using the pipe function.2. Connect the entrance of the pipe to STDOUT_FILENO within the child process.3. Close the entrance of the pipe within the parent process.4. Close the exit from the pipe within the child process.

The parent process will then be able to read the output of the child process from the exit of the pipe.






Header Used by

<errno.h> errno, EINTR

<stdio.h> perror

<stdlib.h> exit

<unistd.h> _exit, close, dup2, execl, fork, pipe, STDOUT_FILENO

<sys/wait.h> wait, pid_t

Create a new pipe using the pipe function

A pipe is an anonymous first-in, first-out (FIFO) buffer with endpoints presented as file descriptors. Because these can be owned by different processes, it provides a convenient means for transporting the output of the child process to the parent process:

int filedes[2];if (pipe(filedes) == -1) { perror("pipe"); exit(1);}

The file descriptor for the entrance to the pipe is written to filedes[1] and the exit to filedes[0]. The former must be transferred to the child process, the latter retained by the parent process. The simplest way to arrange this is to create the pipe before the child process is forked (thus ensuring that each process receives a copy of both descriptors).

Connect the entrance of the pipe to STDOUT_FILENO within the child process

When a process forks, the child inherits a set of file descriptors that are copies of those owned by the parent process. Consequently, if the standard output of the parent process is routed to a particular terminal device then the same will be true of the child process (in the first instance).

To capture the output of the child process, its standard output must instead be routed into the pipe. This can be arranged using the dup2command:

while ((dup2(filedes[1], STDOUT_FILENO) == -1) && (errno == EINTR)) {}

The effect is to close the file descriptor STDOUT_FILENO if it was previously open, then (re)open it as a copy of filedes[1]. A loop is needed to allow for the possibility of dup2 being interrupted by a signal. Once this has been done, filedes[1] can be closed:

close(filedes[1]);

It would be equally acceptable to copy the descriptor onto STDERR_FILENO in order to capture the standard error stream. To capture both stdoutand stderr you can either create two separate pipes, or if it is acceptable for the streams to be mixed, copy the same file descriptor onto bothSTDOUT_FILENO and STDERR_FILENO by calling dup2 twice.

Close the entrance of the pipe within the parent process

The parent process has no need to access the entrance to the pipe, so filedes[1] should be closed within that process too:

close(filedes[1]);

Close the exit from the pipe within the child process

Similarly, the child process has no need to access the exit from the pipe:

close(filedes[0]);

(You should also have made arrangements to close any other file descriptors not needed by the child process, regardless of whether you want to capture its output.)

Sample code

The code for managing the pipe can be integrated into the existing program as follows:

int filedes[2];if (pipe(filedes) == -1) { perror("pipe"); exit(1);}

pid_t pid = fork();if (pid == -1) { perror("fork"); exit(1);} else if (pid == 0) { while ((dup2(filedes[1], STDOUT_FILENO) == -1) && (errno == EINTR)) {} close(filedes[1]); close(filedes[0]); execl(cmdpath, cmdname, (char*)0); perror("execl"); _exit(1);}close(filedes[1]);

It is then possible for the parent process to read the output of the child process from file descriptor filedes[0]:

char buffer[4096];while (1) { ssize_t count = read(filedes[0], buffer, sizeof(buffer)); if (count == -1) { if (errno == EINTR) { continue; } else { perror("read"); exit(1); } } else if (count == 0) { break; } else { handle_child_process_output(buffer, count); }}close(filedes[0]);wait(0);

If you need to avoid blocking while waiting for output from the child then this can be arranged using select, O_NONBLOCK or similar.

Alternatives

Using O_CLOEXEC to close file descriptors

If you want to capture its output then it is quite likely that (as in this example) the child process will be calling a function from the exec family to transfer control to another program. An alternative method is then available for closing the pipe exit within the child process, by setting theO_CLOEXEC flag:

if (fcntl(filedes[0], F_SETFD, FD_CLOEXEC) == -1) { perror("fcntl"); exit(1);}

This should be done in the parent process prior to forking. It avoids the need to take any explicit action within the child process to close the file descriptor, provided that exec is called. This makes little difference if there is only one file descriptor to close, but when there are many child processes executing in parallel the benefits are more noticable: one system call is needed instead of many, and because the flag can be set immediately when the pipe is created there is less risk of file descriptors being missed.

Using popen

The popen function provides most of the functionality described above in the form of a single function call:

FILE* fp = popen("pwd", "r");// ...int status = pclose(fp);

This is undeniably simpler than constructing the pipework explicitly, but popen can also be quite limiting:

It returns a stdio stream as opposed to a raw file descriptor, which is unsuitable for handling the output asynchronously.

Rather than executing the command directly, popen typically spawns an instance of the shell first. This can adversely affect performance, and may have other undesirable side effects.

It is possible to attach to the standard output of the child process or the standard input, but not both at the same time.

It does not provide access to the process ID of the child process. There is no opportunity to modify the context of the child process before exec is called.

Workarounds are possible for some of these issues, but in the author’s experience it is generally better to accept the minor inconvenience of calling pipe, fork and exec explicitly rather than attempting a popen-based solution and taking the risk of it later needing to be rewritten.

See also

Reap zombie processes using a SIGCHLD handler

Further reading

pipe , Base Specifications Issue 7, The Open Group, 2008 dup , Base Specifications Issue 7, The Open Group, 2008

Tags: c | posix | process

http://www.microhowto.info/tags/process.html

http://www.microhowto.info/tags/posix.html

http://www.microhowto.info/tags/c.html

http://pubs.opengroup.org/onlinepubs/9699919799/functions/dup.html

http://pubs.opengroup.org/onlinepubs/9699919799/functions/pipe.html

http://www.microhowto.info/howto/reap_zombie_processes_using_a_sigchld_handler.html

Reap zombie processes using a SIGCHLD handler

Content

1 Objective 2 Background 3 Scenario 4 Method o 4.1 Overview o 4.2 Define a handler for SIGCHLD that calls waitpid o 4.3 Register the SIGCHLD handler 5 Alternatives o 5.1 Explicitly set the SIGCHLD handler to SIG_IGN o 5.2 Set the SA_NOCLDWAIT flag 6 See also 7 Further reading

Tested on




Objective

To install a SIGCHLD handler for reaping zombie processes

Background

When a child process terminates it does not disappear entirely. Instead it becomes a ‘zombie process’ which is no longer capable of executing, but which still has a PID and an entry in the process table. This is indicated by the state code Z in ps or top.

The presence of a moderate number of zombie processes is not particularly harmful, but they add unnecessary clutter that can be confusing to the administrator. In extreme cases they could exhaust the number of available process table slots. For these reasons, well-behaved programs should ensure that zombie processes are removed in a timely manner.

The process of eliminating zombie processes is known as ‘reaping’. The simplest method is to call wait, but this will block the parent process if the child has not yet terminated. Alternatives are to use waitpid to poll or SIGCHLD to reap asynchronously. The method described here usesSIGCHLD.

http://www.microhowto.info/howto/reap_zombie_processes_using_a_sigchld_handler.html#idp64752












Scenario

Suppose you have written a network server which spawns a separate child process to handle each connection. The child process terminates itself when the connection closes, without any involvement from the parent process. It would be unacceptable for the parent process to block, therefore calling wait immediately after fork is not an option.

Method

Overview

The method described here has two steps:

1. Define a handler for SIGCHLD that calls waitpid.2. Register the SIGCHLD handler.

Note that the signal is named SIGCHLD with an H, as opposed to SIGCLD (which has a similar function, but potentially different semantics and is non-portable).


Header Used by

<signal.h> sigaction, sigemptyset, struct sigaction, SIGCHLD, SA_RESTART, SA_NOCLDSTOP

<stdio.h> perror

<stdlib.h> exit

<sys/wait.h> waitpid, pid_t, WNOHANG

Define a handler for SIGCHLD that calls waitpid

The operations that can be safely performed within a signal handler are very limited, but they include use of the waitpid function:

void handle_sigchld(int sig) { while (waitpid((pid_t)(-1), 0, WNOHANG) > 0) {}}

The reason for calling waitpid as opposed to wait is to allow use of the WNOHANG option, which prevents the handler from blocking. This allows for the possibility of SIGCHLD being raised for reasons other than the termination of a child process.

(SIGCHLD has three conventional uses: to indicate that a child process has terminated, stopped or continued. The latter two conditions can be suppressed using SA_NOCLDSTOP as described below, but

that would not prevent a process with the right permissions from raising SIGCHLD for any reason using the kill function or an equivalent.)

The reason for placing waitpid within a loop is to allow for the possibility that multiple child processes could terminate while one is in the process being reaped. Only one instance of SIGCHLD can be queued, so it may be necessary to reap several zombie processes during one invocation of the handler function.

The loop ensures that any zombies which existed prior to invocation of the handler function will be reaped. If any further zombies come into being after that moment in time then they may or may not be reaped by that invocation of the handler function (depending on the timing), but they should leave behind a pending SIGCHLD that will result in the handler being called again.

Register the SIGCHLD handler

The POSIX-recommended method for registering a signal handler is to use the sigaction function:

struct sigaction sa;sa.sa_handler = &handle_sigchld;sigemptyset(&sa.sa_mask);sa.sa_flags = SA_RESTART | SA_NOCLDSTOP;if (sigaction(SIGCHLD, &sa, 0) == -1) { perror(0); exit(1);}

You should do this before any child processes terminate, which in practice means registering before any are spawned. (POSIX neither requires nor prohibits SIGCHLD being raised in respect of a child that had already terminated when the handler was registered, so a program which relied on this happening might work but would not be portable.)

When an operating system function is interrupted by a signal the default behaviour is to return immediately (either with the error EINTR, or reporting partial completion if that is possible). This creates a need for such functions to be wrapped in a loop for the purpose of handling EINTR, which is both inconvenient and error-prone. Setting theSA_RESTART flag when the signal is registered makes this unnecessary in most cases, and is recommended unless you have a good reason not to.

Setting the SA_NOCLDSTOP flag prevents SIGCHLD from being raised when a child process stops or continues (as opposed to terminating). Since our interest is confined to processes that have terminated, there no harm in this and it may prevent the handler being invoked unnecessarily. It does not obviate the need to use WNOHANG within the handler because it does not prevent SIGCHLD from being raised in some other way.

Alternatives

Explicitly set the SIGCHLD handler to SIG_IGN

If (as in the example above) the signal handler does nothing beyond calling waitpid then an alternative is available. Setting the SIGCHLD handler to SIG_IGN will cause zombie processes to be reaped automatically:

struct sigaction sa;sa.sa_handler = SIG_IGN;sigemptyset(&sa.sa_mask);sa.sa_flags = 0;if (sigaction(SIGCHLD, &sa, 0) == -1) { perror(0); exit(1);}

This can be implemented portably and somewhat more concisely with the signal function if you prefer:

if (signal(SIGCHLD, SIG_IGN) == SIG_ERR) { perror(0); exit(1);}

Note that it is not sufficient for SIGCHLD to have a disposition that causes it to be ignored (as the default, SIG_DFL, would do): it is only by setting it to SIG_IGN that this behaviour is obtained.

One drawback of this method is that it is slightly less portable than explicitly calling waitpid: the behaviour it depends on is required by POSIX.1-2001, and previously by the Single Unix Specification, but not by POSIX.1-1990.

Set the SA_NOCLDWAIT flag

Another way to achieve the same outcome is to set the SA_NOCLDWAIT flag when installing the signal handler:

struct sigaction sa;sa.sa_handler = &handle_sigchld;sigemptyset(&sa.sa_mask);sa.sa_flags = SA_RESTART | SA_NOCLDSTOP | SA_NOCLDWAIT;if (sigaction(SIGCHLD, &sa, 0) == -1) { perror(0); exit(1);}

Unfortunately this is not as useful as it could be, because it is implementation-defined whether SIGCHLD is raised in response to process termination when SA_NOCLDWAIT is set. Since you cannot rely on the handler function being invoked, it follows that the handler cannot actually do

anything if you want its behaviour to be portable. At that point you may as well set the handler to SIG_IGN, in which case there is arguably no need to set SA_NOCLDWAIT.

There is one small advantage to using SA_NOCLDWAIT: if it is supported at all then you can be reasonably confident that it will have the desired behaviour, whereas for SIG_IGN this is assured only if the operating system declares conformance to an appropriate version of POSIX or SUS.

See also

Capture the output of a child process in C

Further reading

wait, waitpid , Base Specifications Issue 7, The Open Group, 2008 <signal.h> , Base Specifications Issue 7, The Open Group, 2008

Calculate an Internet Protocol checksum in C

Content

1 Objective 2 Background 3 Scenario 4 Method o 4.1 Overview o 4.2 Implementation (optimised for clarity) o 4.3 Implementation (optimised for speed) 5 Testing 6 Variations o 6.1 Verifying a checksum o 6.2 Avoiding the use of memcpy o 6.3 Omitting the conversion between network and host byte

order 7 Further reading

Tested on

Debian (Lenny)

Objective

To calculate an Internet Protocol checksum in C

Background

RFC 791 defines the following checksum algorithm for use when constructing the header of an IPv4 datagram:


http://www.microhowto.info/howto/calculate_an_internet_protocol_checksum_in_c.html#idp57632














http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/signal.h.html

http://pubs.opengroup.org/onlinepubs/9699919799/functions/wait.html

http://www.microhowto.info/howto/capture_the_output_of_a_child_process_in_c.html

The checksum field is the 16 bit one's complement of the one's complement sum of all 16 bit words in the header. For purposes of computing the checksum, the value of the checksum field is zero.

The same algorithm is used by a number of other IP-based protocols including TCP, UDP and ICMP. Implementation techniques are discussed in RFC 1071 , RFC 1141 and RFC 1624 .

Scenario

Suppose that you wish to send an ICMP echo request using a raw socket. Like all ICMP messages this contains a checksum that is calculated using the algorithm described above. Given the message to be sent, you wish to calculate the required checksum.

Method

Overview

The checksum can be calculated using the following algorithm:

1. Set the checksum field to zero.2. Pad the data to an even number of bytes.3. Reinterpret the data as a sequence of 16-bit unsigned integers that are in network byte

order.4. Calculate the sum of the integers, subtracting 0xffff whenever the result

reaches 0x10000 or greater.5. Calculate the bitwise complement of the sum. This is the required value of the checksum

field.

One’s complement notation has two representations for the number zero: normal zero (0x0000 in this case) and negative zero (0xffff). It is not completely clear how these should be handled:

RFC 791 states only that one’s complement arithmetic should be used, and does not address the question of how zero is represented.

The incremental algorithm recommended by RFC 1624 always prefers normal zero over negative zero, and the text makes clear that this was an explicit design goal.

The non-incremental algorithm described in §4.1 of RFC 1071 behaves similarly, except in the special case where the data is all zeros (which can never occur in a valid IP datagram header). It would not feasible for an incremental algorithm to replicate this idiosyncrasy.

In the interests of consistency, the implementations described here prefer normal zero over negative zero in all cases (even where the data is all zeros). This is achieved by initialising the accumulated sum to negative zero (0xffff), which makes no difference to the final result except in the case where nothing is added to it.




To exactly replicate the behaviour of the example given in RFC 1071, the accumulator should instead be initialised to normal zero (0x0000).

Implementation (optimised for clarity)

Here is a near-literal implementation of the algorithm described above:

uint16_t ip_checksum(void* vdata,size_t length) { // Cast the data pointer to one that can be indexed. char* data=(char*)vdata;

// Initialise the accumulator. uint32_t acc=0xffff;

// Handle complete 16-bit blocks. for (size_t i=0;i+1<length;i+=2) { uint16_t word; memcpy(&word,data+i,2); acc+=ntohs(word); if (acc>0xffff) { acc-=0xffff; } }

// Handle any partial block at the end of the data. if (length&1) { uint16_t word=0; memcpy(&word,data+length-1,1); acc+=ntohs(word); if (acc>0xffff) { acc-=0xffff; } }

// Return the checksum in network byte order. return htons(~acc);}

The data should be passed to the function in network byte order with the checksum field already zeroed. The result is returned in network byte order, so is ready to be written directly into the checksum field.

If there is an odd byte at the end of the data then this is treated as a special case so that padding can be done on the fly. The calls to memcpy are needed to avoid breaking the strict aliasing rules, which prevent an arbitrary type from being safely cast to a uint16_t.

Implementation (optimised for speed)

The following implementation uses two techniques to improve performance:

deferring carries until the end of the calculation by allowing the accumulator to exceed 0xfff, and

performing multiple additions in parallel.

uint16_t ip_checksum(void* vdata,size_t length) { // Cast the data pointer to one that can be indexed. char* data=(char*)vdata;

// Initialise the accumulator. uint64_t acc=0xffff;

// Handle any partial block at the start of the data. unsigned int offset=((uintptr_t)data)&3; if (offset) { size_t count=4-offset; if (count>length) count=length; uint32_t word=0; memcpy(offset+(char*)&word,data,count); acc+=ntohl(word); data+=count; length-=count; }

// Handle any complete 32-bit blocks. char* data_end=data+(length&~3); while (data!=data_end) { uint32_t word; memcpy(&word,data,4); acc+=ntohl(word); data+=4; } length&=3;

// Handle any partial block at the end of the data. if (length) { uint32_t word=0; memcpy(&word,data,length); acc+=ntohl(word); }

// Handle deferred carries. acc=(acc&0xffffffff)+(acc>>32); while (acc>>16) { acc=(acc&0xffff)+(acc>>16); }

// If the data began at an odd byte address // then reverse the byte order to compensate. if (offset&1) { acc=((acc&0xff00)>>8)|((acc&0x00ff)<<8); }

// Return the checksum in network byte order. return htons(~acc);}

The maximum length of message that can be processed by this function is limited to approximately 16 gigabytes by the number of deferred carries that can be accumulated. In this unlikely event that this is insufficient then the upper half of the accumulator can be folded into the lower half as often as is necessary to prevent an overflow. This is more likely to be required when processing 16-bit blocks using a 32-bit accumulator, in which case only 128 kilobytes can be processed without the risk of overflow.

Testing

Here is an example of how an 8-byte ICMP echo request might be constructed using the icmphdr structure type provided by glibc:

struct icmphdr req;req.type=8;req.code=0;

req.checksum=0;req.un.echo.id=htons(0x1234);req.un.echo.sequence=htons(1);req.checksum=ip_checksum(&req,8);

The resulting message, as a hexadecimal byte stream, should be as follows:

08 00 E5 CA 12 34 00 01

Variations

Verifying a checksum

There are two ways in which checksums of the type described here can be verified:

by calculating what the checksum should be using the normal method, then comparing this to the value received, or

by calculating the checksum without first zeroing the checksum field, then comparing this with normal zero (0x0000).

The second method is likely to be simpler, quicker and more convenient in most cases. If you should decide to use the first method then some care is needed with regard to negative and normal zero. RFC 1624 recommends that either be accepted (in accordance with the robustness principle: be conservative in what you send, liberal in what you accept). This can be achieved by normalising the received checksum before performing the comparison.

(No special action is required when using the first method, provided that the checksum algorithm used to perform the verification consistently returns normal zero in preference to negative zero. A minor optimisation would be to omit the final inversion and compare the accumulator with negative zero.)

Avoiding the use of memcpy

If the data were presented to the checksum function as an array of uint16_t then the calls to memcpy could be omitted. There are two ways to achieve this. The safer method is to assemble the message within a union:

union { uint16_t words[740]; struct icmphdr icmp;} message;

This is allowed by C99, but not by C89 or C++. It has the disadvantage that the union must be constructed by the caller if copying is to be avoided, and this may not always be practicable.

The alternative is to reinterpret the data by means of a type cast. This would not normally be safe in any variant of C or C++, and would be quite likely to fall foul of the aliasing rules that are specified by C99. However in some compilation environments it can be made safe (or at least, less unsafe) by disabling strict application of the aliasing rules. In the case of GCC this is done using the -fno-strict-aliasing option or themay_alias attribute.

It should be noted that the removal of memcpy will not necessarily improve the performance of of the checksum function because the compiler may already be able to achieve the same result without assistance. For example, GCC can do this in some cases when optimisation is enabled. It would be advisable to determine whether there is any benefit to be gained before making non-portable changes to the source code.

Omitting the conversion between network and host byte order

The checksum algorithm described here has the property that it works equally well when the upper and lower halves of each 16-bit block are reversed. For example, applying it to the sequence:

0x4500, 0x001c, 0x03de, 0x0000, 0x4001, 0x0000, 0x7f00, 0x0001, 0x7f00, 0x0001

gives a checksum of 0x7901, whereas applying it to:

0x0045, 0x1c00, 0xde03, 0x0000, 0x0140, 0x0000, 0x007f, 0x0100, 0x007f, 0x0100

gives 0x0179. This due to the carry from the most significant byte of each block being fed back into the least significant byte and vice versa. It might therefore appear that the calls to ntohs and htons made above are redundant. This is almost, but not quite, correct.

The usual behaviour of ntohs is to either do nothing or reverse the byte order. In either of these cases the calls to ntohs and htons cancel out and could be removed. However POSIX states quite clearly that an arbitrary rearrangement of the bit pattern could occur, so if you want to be certain that the algorithm will behave as intended then an explicit conversion to host byte order is necessary.

Further reading

J. Postel, Internet Protocol - DARPA Internet Program Protocol Specification, STD 5, RFC 791, DARPA, September 1981

R. Braden, D. Borman and C. Partridge, Computing the Internet Checksum, RFC 1071, September 1988

T Mallory and A. Kullberg, Incremental Updating of the Internet Checksum, RFC 1141, January 1990

A. Rijsinghani, Computation of the Internet Checksum via Incremental Update, RFC 1624, May 1994





Send a UDP datagram in C

Content

1 Objective 2 Scenario 3 Method o 3.1 Overview o 3.2 Construct the remote socket address o 3.3 Create the client socket. o 3.4 Send the datagram o 3.5 Send the datagram (using sendto) o 3.6 Send the datagram (using sendmsg) 4 Variations o 4.1 Sending to the IPv4 broadcast address o 4.2 Replying to a datagram o 4.3 Connecting to a remote host 5 See also 6 Further Reading

Tested on

Debian (Lenny)

Objective

To send an outbound UDP datagram in C

Scenario

Suppose that you wish to write a client that implements the UDP-based variant of the Daytime Protocol, as defined by RFC 867

This is a very simple protocol whereby the client sends a datagram to the server, then the server responds with a datagram containing a human-readable copy of the current date and time. The datagram from the client is not required to have any particular content.

Method

Overview


1. Construct the remote socket address.2. Create a UDP socket.3. Send the datagram.


http://tools.ietf.org/html/rfc867

http://www.microhowto.info/howto/send_a_udp_datagram_in_c.html#idp92752















#include <errno.h>#include <string.h>#include <unistd.h>#include <netdb.h>#include <sys/socket.h>#include <netinet/in.h>

and if using sendmsg to send the datagram:

#include <sys/uio.h>

Construct the remote socket address

To send a UDP datagram it is necessary to specify the remote IP address and port number to which the connection should be directed. The combination of these two values is treated as a single entity called the socket address, which is represented by a struct sockaddr_in for IPv4 or a struct sockaddr_in6 for IPv6.

A local socket address may also be specified, however it is rarely necessary to do so. By default the local address is chosen automatically by the network stack.

Most common network services have an assigned port number on which they are normally expected to listen. It makes sense for the client to use this as a default, however it is important that an alternative can be selected. The user of the client will not necessarily have any control over how the server is configured, so the onus is on the client software to provide access to whichever port the server has been instructed to use.

It is often useful for the remote IP address to default to the loopback address, particularly for services such as databases where there is a good chance of the client and server being run on the same machine. Alternatively, it may be preferable to require that the destination be specified explicitly.

For most purposes the best way to construct the remote address is by calling getaddrinfo. This takes a string containing either a hostname or an IP address, and a second string containing either a service name or a port number. These are converted into a sockaddr_in or a sockaddr_in6 as appropriate:

const char* hostname=0; /* localhost */const char* portname="daytime";struct addrinfo hints;memset(&hints,0,sizeof(hints));hints.ai_family=AF_UNSPEC;hints.ai_socktype=SOCK_DGRAM;hints.ai_protocol=0;hints.ai_flags=AI_ADDRCONFIG;struct addrinfo* res=0;int err=getaddrinfo(hostname,portname,&hints,&res);if (err!=0) { die("failed to resolve remote socket address (err=%d)",err);

}

The hints argument contains additional information to help guide the conversion. In this example:

The address family has been left unspecified so that both IPv4 and IPv6 addresses can be returned. In principle you could receive results for other address families too: you can either treat this as a feature, or filter out any unwanted results after the call to getaddrinfo.

The socket type has been constrained to SOCK_DGRAM. This allows UDP but excludes TCP. The protocol has been left unspecified because it is only meaningful in the context of a

specific address family. If the address family had been set to AF_INET or AF_INET6 then this field could have been set to IPPROTO_TCP (but it is equally acceptable to leave it set to zero).

The AI_PASSIVE flag has not been set because the result is intended for use as a remote address, not as a local address. This causes the IP address to default to the loopback address (as opposed to the wildcard address).

The AI_ADDRCONFIG flag has been set so that IPv6 results will only be returned if the server has an IPv6 address, and similarly for IPv4.

The res argument is used to return a linked list of addrinfo structures containing the address or addresses that were found. If multiple records are returned then the recommended behaviour (from RFC 1123) is to try each address in turn, stopping when a successful outcome is achieved. This assumes that you have some way to distinguish success from failure, which may not always be the case, but if you are able to do this then you should. If not then an acceptable alternative is to use the first result and discard the remainder.

The memory occupied by the result list should be released by calling freeaddrinfo once it is no longer needed, however this cannot be done until after the datagram has been sent.

Create the client socket.

The socket that will be used to send the datagram should be created using the socket function. This takes three arguments:

1. the domain (AF_INET or AF_INET6 in this case, corresponding to IPv4 or IPv6 respectively),2. the socket type (SOCK_DGRAM in this case, meaning that the socket should provide

connectionless and potentially unreliable transfer of datagrams), and3. the protocol (IPROTO_UDP in this case, corresponding to UDP).

A value of 0 for the protocol requests the default for the given address family and socket type, which for AF_INET or AF_INET6 and SOCK_DGRAMwould be IPPROTO_UDP. It is equally acceptable for the protocol to be deduced in this manner or specified explicitly.

Assuming you previously used getaddrinfo to construct the remote address then the required values can be obtained from the addrinfostructure:


int fd=socket(res->ai_family,res->ai_socktype,res->ai_protocol);if (fd==-1) { die("%s",strerror(errno));}

Send the datagram

Datagrams can be sent using any function that is capable of writing to a file descriptor, however unless you have connected the socket to a particular remote address (as described below) it is necessary to use either sendto or sendmsg so that a destination address can be specified. Of these sendmsg is the more flexibile option, but at the cost of a signficiantly more complex interface. Details for each function are given below.


Send the datagram (using sendto)

To call sendto you must supply the content of the datagram and the remote address to which it should be sent:

if (sendto(fd,content,sizeof(content),0, res->ai_addr,res->ai_addrlen)==-1) { die("%s",strerror(errno));}


The value returned by sendto is the number of bytes sent, or -1 if there was an error. UDP datagrams are sent atomically, so unlike when writing to a TCP socket there is no need to wrap the function call in a loop to handle partially-sent data.

Send the datagram (using sendmsg)


struct iovec iov[1];iov[0].iov_base=content;iov[0].iov_len=sizeof(content);

struct msghdr message;message.msg_name=res->ai_addr;message.msg_namelen=res->ai_addrlen;message.msg_iov=iov;message.msg_iovlen=1;message.msg_control=0;

message.msg_controllen=0;





Variations

Sending to the IPv4 broadcast address

By default, attempts to send a datagram to the broadcast address are rejected with an error (typically EACCES, however it is not obvious from the POSIX specification which error should occur). This is a safety measure intended to reduce the risk of making unintended broadcasts. It can be overridden by setting the SO_BROADCAST socket option:

int broadcast=1;if (setsockopt(fd,SOL_SOCKET,SO_BROADCAST, &broadcast,sizeof(broadcast))==-1) { die("%s",strerror(errno));}

Replying to a datagram

When replying to a UDP datagram the response should normally be sent to the IP address and port number from which the request originated. This can be arranged by capturing the source address of the request using recvfrom or recvmsg, then passing it to sendto or sendmsg as the destination address for the response.

There is also the question of where the response should be sent from. In most cases the best choice will be from the port and IP address to which the request was directed. This is not a requirement of the User Datagram Protocol itself, however there are several reasons why it is desirable:

Generic firewalls and NAT gateways normally use both source and destination port numbers and IP addresses for connection tracking (as per RFC 2663 ) so will fail to associate the response with the request if it is not sent from the appropriate port and IP address.


The behaviour of the connect function in relation to UDP strongly encourages the assumption that any response will originate from a matching IP address and port number. When a UDP socket is in the connected state, datagrams from any other source are rejected.

RFC 1123 recommends (but does not require) that when replying to a UDP datagram on a multihomed host, the response should be sent from the IP address to which the request was directed.

Some application-layer protocols (such as DNS) explicitly require that replies be sent from a matching port.

An exception would be where the application-layer protocol explicitly requires or allows the response to originate from a different port (for example, as is the case for TFTP).

Replying from a matching port number can be achieved very easily by sending the response using the socket that received the request. This method will reply from a matching IP address if the socket is bound to a specific address, but not necessarily if it is bound to the wildcard address and the server is multihomed.

Unfortunately the POSIX API does not provide a satisfactory way to reply from a matching IP address in a portable manner. Briefly, the available options include:

using a non-portable mechanism such as IP_PKTINFO or the combination of IP_RECVDSTADDR and IP_SENDSRCADDR to obtain and set the local IP address,

binding a separate socket to each local IP address, having non-portably obtained a list of addresses using a mechanism such asSIOCGIFCONF, or

sending the response from the wildcard address in cases where use of a matching address is non-mandatory, accepting that there are some use cases in which this will fail.

This is a substantial topic in its own right and will be the subject of a future microHOWTO.

Connecting to a remote host

When exchanging many datagrams from a particular remote host it may be beneficial for a UDP socket to be connected to that host. This removes the need for the remote address to be explicitly checked every time a datagram is received, and for the address to be specified every time one is sent. The connection is made using the connect function:

if (connect(fd,remote_addr,sizeof(remote_addr))==-1) { die("%s",strerror(errno));}

This is superficially identical to the call that would be made to establish a TCP connection, however unlike TCP there is no handshake. This has two notable consequences:

Calling connect on a UDP socket does not (by itself) result in any network activity.


The call to connect will succeed even if the remote machine is unreachable or nonexistant.

A UDP socket in the connected state will only receive datagrams that originate from the given remote address. It is therefore feasible to use functions such as read or recv in place of recvfrom. Similarly the given remote address becomes the default for outgoing datagrams, therefore it is feasible to use write or send in place of sendto. (Being connected does not, however, prevent you from sending datagrams to arbitrary destinations using sendto if you so wish.)

See also

Listen for and receive UDP datagrams in C Establish a TCP connection in C Send an arbitrary IPv4 datagram using a raw socket in C

Further Reading

W. Richard Stevens et al, Unix Network Programming, Volume 1: The Sockets Networking API, 3rd edition, Addison-Wesley, 2003

The Open Group, sendto, Base Specifications Issue 6 The Open Group, sendmsg, Base Specifications Issue 6

Listen for and receive UDP datagrams in C

Content

1 Objective 2 Scenario 3 Method o 3.1 Overview o 3.2 Construct the local socket address o 3.3 Create the socket. o 3.4 Bind the local address to the socket o 3.5 Receive and handle datagrams as they arrive o 3.6 Receive and handle datagrams as they arrive using

recvfromo 3.7 Receive and handle datagrams as they arrive using

recvmsg 4 Variations o 4.1 Listening for a reply o 4.2 Connecting to a remote host o 4.3 Determining the local address 5 See also 6 Further Reading

Tested on

Debian (Lenny)

Ubuntu (Lucid)

http://www.microhowto.info/howto/listen_for_and_receive_udp_datagrams_in_c.html#idp95328


















http://pubs.opengroup.org/onlinepubs/009695399/functions/sendmsg.html

http://pubs.opengroup.org/onlinepubs/009695399/functions/sendto.html



http://www.microhowto.info/howto/listen_for_and_receive_udp_datagrams_in_c.html

Objective

To listen for and receive inbound UDP datagrams in C

Scenario

Suppose that you wish to write a server that implements the UDP-based variant of the Daytime Protocol, as defined by RFC 867

This is a very simple protocol whereby the client sends a datagram to the server, then the server responds with a datagram containing a human-readable copy of the current date and time. The datagram from the client is not required to have any particular content.

Method

Overview

The method described here has four steps:

1. Construct the local socket address.2. Create the socket.3. Bind the local address to the socket.4. Receive and handle datagrams as they arrive.

This is the appropriate procedure when listening for unsolicited datagrams, as in the scenario described above. See below for how it can be adapted to:

listening for a reply to a datagram that you have sent, or exchanging many datagrams with a particular remote host.



and if using recvmsg to receive datagrams:

#include <sys/uio.h>


Construct the local socket address

In order to listen for UDP datagrams it is necessary to choose a port number and, optionally, a local IP address on which to listen. The combination of these two values is treated as a single entity called the socket address, which is represented by a struct sockaddr_in for IPv4 or a struct sockaddr_in6 for IPv6.

Most common network services have an assigned port number on which they are normally expected to listen. While it makes sense to use this as the default, it is good practice to make the port number configurable. Possible reasons for wanting to override the assigned port number include:

running multiple instances of a network service on the same machine, running a network service that would normally use a well-known port number from a non-

root account, or making port scanning more time-consuming than it would be if the standard port number

were used.

The local IP address should normally default to either the the wildcard address or the loopback address, but like the port number it is good practice to make it configurable. When a service is bound to a particular IP address it will only accept connections directed to that address, whereas when bound to the wildcard address it will accept connections to any local address. Binding to the loopback address has the effect of prohibiting connections from other machines.

For most purposes the best way to construct the socket address is by calling getaddrinfo. This takes a string containing the IP address and a string containing the port number, and converts them into a sockaddr_in or a sockaddr_in6 as appropriate. It is also able to resolve hostnames and service names:

const char* hostname=0; /* wildcard */const char* portname="daytime";struct addrinfo hints;memset(&hints,0,sizeof(hints));hints.ai_family=AF_UNSPEC;hints.ai_socktype=SOCK_DGRAM;hints.ai_protocol=0;hints.ai_flags=AI_PASSIVE|AI_ADDRCONFIG;struct addrinfo* res=0;int err=getaddrinfo(hostname,portname,&hints,&res);if (err!=0) { die("failed to resolve local socket address (err=%d)",err);}



The socket type has been constrained to SOCK_DGRAM. This allows UDP but excludes TCP.

The protocol has been left unspecified because it is only meaningful in the context of a specific address family. If the address family had been set to AF_INET or AF_INET6 then this field could have been set to IPPROTO_UDP (but it is equally acceptable to leave it set to zero).

The AI_PASSIVE flag has been set because the address is intended for use by a server. It causes the IP address to default to the wildcard address as opposed to the loopback address.


The res argument is used to return a linked list of addrinfo structures containing the address or addresses that were found. If the network service daemon has the ability to listen on multiple sockets then it should open one for each address in the list. Otherwise it is considered acceptable to use the first result and discard the remainder.

The memory occupied by the result list should be released by calling freeaddrinfo once it is no longer needed, however this cannot be done until after the socket has been created and bound.

Create the socket.

The socket that will be used to listen for inbound datagrams should be created using the socket function. This takes three arguments:

1. the domain (AF_INET or AF_INET6 in this case, corresponding to IPv4 or IPv6 respectively),2. the socket type (SOCK_DGRAM in this case, meaning that the socket should provide

connectionless and potentially unreliable transfer of datagrams), and3. the protocol (IPROTO_UDP in this case, corresponding to UDP).

A value of 0 for the protocol requests the default for the given address family and socket type, which for AF_INET or AF_INET6 and SOCK_DGRAMwould be IPPROTO_UDP. It is equally acceptable for the protocol to be deduced in this manner or specified explicitly.



Bind the local address to the socket

As noted previously, the server socket must be bound to a local address before it can listen for inbound datagrams. This should be done using the bind function:

if (bind(fd,res->ai_addr,res->ai_addrlen)==-1) {

die("%s",strerror(errno));}

The first argument is the socket descriptor. The second and third arguments are the local address and its length.

If the local address was constructed using getaddrinfo then the memory occupied by the address list can now be released:

freeaddrinfo(res);

(If the address list has been searched or filtered then take care that it is the head of the list that is released, not the address that you have chosen to use.)

Receive and handle datagrams as they arrive

Datagrams can be received using any function that is capable of reading from a file descriptor, however if you are listening for unsolicited datagrams (as in this example) then you will normally want to know where each datagram originated from so that it can be replied to. This information is provided by the functions recvfrom and recvmsg. Of these recvmsg is the more flexible option, but at the cost of a significantly more complex interface. Details for each function are given below.

Regardless of which function you choose you will need to supply a buffer to receive the data. If this is too small to accommodate a complete datagram then any excess is discarded. That means you need not be concerned about tracking datagram boundaries, because the first byte returned by a read operation will always be the start of a datagram. However it does raise two issues: how the buffer size should be chosen, and how any overflow can be detected.

UDP-based application-layer protocols often limit the size of datagram that can be sent in order to provide an solution to the first issue. For example, TFTP and DNS each have a fixed maximum payload size of 512 bytes. For DHCP the limit defaults to 548 bytes, but a larger value can be negotiated if both parties are willing to support it.

In the absence of such guidance it is necessary to consider what the transport, network and link layer protocols are likely to support. The maximum payload size for UDP over IPv4 is 65507 bytes, and for IPv6 with jumbogram support it is close to 4 gigabytes. However, the largest payload that an implementation is required to support is 548 bytes for IPv4 and 1452 bytes for IPv6. On an Ethernet with the standard MTU of 1500 bytes, the largest payload that can be sent without fragmentation is 1472 bytes. On this basis, 1472 bytes would be a reasonable choice if you have no reason to believe that a larger buffer is needed or that a smaller buffer would suffice.

It is possible to receive arbitrary-length datagrams with assistance from the MSG_PEEK option, however if you choose to do this then it would be prudent to set an upper limit in order to prevent denial of service attacks.

The recvmsg function explicitly reports truncation by setting the MSG_TRUNC flag in the msg_flags member of the message header. Alternatively, truncation can be detected when using any of the available functions by providing a buffer that is one byte longer than the largest payload that you actually wish to receive, then interpreting a full buffer as a truncated datagram.

Receive and handle datagrams as they arrive using recvfrom

To call recvfrom you need a buffer for the datagram and a buffer for the remote address:

char buffer[549];struct sockaddr_storage src_addr;socklen_t src_addr_len=sizeof(src_addr);ssize_t count=recvfrom(fd,buffer,sizeof(buffer),0,(struct sockaddr*)&src_addr,&src_addr_len);if (count==-1) { die("%s",strerror(errno));} else if (count==sizeof(buffer)) { warn("datagram too large for buffer: truncated");} else { handle_datagram(buffer,count);}

The fourth argument is for specifying flags which modify the behaviour of recvfrom, none of which are needed in this example.

The value returned by recvfrom is the number of bytes received, or -1 if there was an error. Truncation is detected in this example using the technique described above of providing a slightly over-sized datagram buffer.

Receive and handle datagrams as they arrive using recvmsg

To call recvmsg, in addition to buffers for the datagram and remote address you must also construct an iovec array and a msghdr structure:

char buffer[548];struct sockaddr_storage src_addr;

struct iovec iov[1];iov[0].iov_base=buffer;iov[0].iov_len=sizeof(buffer);

struct msghdr message;message.msg_name=&src_addr;message.msg_namelen=sizeof(src_addr);message.msg_iov=iov;message.msg_iovlen=1;message.msg_control=0;message.msg_controllen=0;

ssize_t count=recvmsg(fd,&message,0);if (count==-1) { die("%s",strerror(errno));} else if (message.msg_flags&MSG_TRUNC) { warn("datagram too large for buffer: truncated");} else { handle_datagram(buffer,count);}


The msghdr structure exists to bring the number of arguments to recvmsg and sendmsg down to a managable number. On entry to recvmsg it specifies where the source address, the datagram payload and any ancillary data should be stored. In this example no ancillary data has been requested, therefore no provision has been made for receiving any.

The msg_flags field of the msghdr structure is used by recvmsg to return flags to the caller. These include the MSG_TRUNC flag, which on exit will be set if the datagram was truncated or clear if it was not. If you wish to pass any flags into recvmsg then this cannot be done using msg_flags, which is ignored on entry. Instead you must pass them using the third argument to recvmsg (which is zero in this example).

Variations

Listening for a reply

When listening for a reply to a datagram that you have sent then three of the four steps listed above may be omitted:

You can (and normally should) listen for the reply using the same socket from which the request was sent.

The act of sending the request will have bound the socket to an unused port number. This will have been used as the source of the request, so should match the destination of the reply. The socket is therefore correctly bound to receive the reply.

Connecting to a remote host

When exchanging many datagrams from a particular remote host it may be beneficial for a UDP socket to be connected to that host. This removes the need for the remote address to be explicitly checked every time a datagram is received, and for the address to be specified every time one is sent. The connection is made using the connect function:

if (connect(fd,remote_addr,sizeof(remote_addr))==-1) { die("%s",strerror(errno));

}

This is superficially identical to the call that would be made to establish a TCP connection, however unlike TCP there is no handshake. This has two notable consequences:

Calling connect on a UDP socket does not (by itself) result in any network activity. The call to connect will succeed even if the remote machine is unreachable or nonexistant.

A UDP socket in the connected state will only receive datagrams that originate from the given remote address. It is therefore feasible to use functions such as read or recv in place of recvfrom. Similarly the given remote address becomes the default for outgoing datagrams, therefore it is feasible to use write or send in place of sendto. (Being connected does not, however, prevent you from sending datagrams to arbitrary destinations using sendto if you so wish.)

Determining the local address

When replying to a datagram on a multihomed host, RFC 1123 recommends that the source address of the reply should match the destination address of the corresponding request. Unfortunately the POSIX API does not provide a satisfactory way to achieve this in a portable manner. Briefly, the available options include:

using a non-portable mechanism to obtain the address, such as IP_RECVDSTADDR or IP_PKTINFO, if one is available,

binding a separate socket to each local IP address, having non-portably obtained a list of addresses using a mechanism such asSIOCGIFCONF, or

sending the response from the wildcard address in cases where use of a maching address is non-mandatory, accepting that there are some use cases in which this will fail.

This is a substantial topic in its own right and will be the subject of a future microHOWTO.

See also

Send a UDP datagram in C Listen for and accept TCP connections in C

Further Reading

W. Richard Stevens et al, Unix Network Programming, Volume 1: The Sockets Networking API, 3rd edition, Addison-Wesley, 2003

The Open Group, recvfrom, Base Specifications Issue 6 The Open Group, recvmsg, Base Specifications Issue 6

Establish a TCP connection in C

Content

1 Objective 2 Scenario 3 Method o 3.1 Overview o 3.2 Construct the remote socket address o 3.3 Create the client socket o 3.4 Connect the socket to the remote address. 4 See also

http://pubs.opengroup.org/onlinepubs/009695399/functions/recvmsg.html

http://pubs.opengroup.org/onlinepubs/009695399/functions/recvfrom.html

http://www.microhowto.info/howto/establish_a_tcp_connection_in_c.html#idp45456








http://www.microhowto.info/howto/listen_for_and_accept_tcp_connections_in_c.html



5 Further Reading

Tested on

Debian (Lenny)

Ubuntu (Precise)

Objective

To establish an outbound TCP connection in C

Scenario

Suppose that you wish to write a client that implements the TCP-based variant of the Daytime Protocol, as defined by RFC 867

This is a very simple protocol whereby the server sends a human-readable copy of the current date and time then closes the connection. The client is not required to send any data, and anything it does send is ignored.

Method

Overview


1. Construct the remote socket address.2. Create the client socket.3. Connect the socket to the remote address.



Construct the remote socket address

To establish an outbound TCP connection it is necessary to specify the remote IP address and port number to which the connection should be directed. The combination of these two values is treated as a single entity called the socket address, which is represented by a struct sockaddr_in for IPv4 or a struct sockaddr_in6 for IPv6.



(A local socket address may also be specified, however it is rarely necessary to do so. By default the local address is chosen automatically by the network stack.)

Most common network services have an assigned port number on which they are normally expected to listen. It makes sense for the client to use this as the default, however it is important that an alternative can be selected. The user of the client will not necessarily have any control over how the server is configured, so the onus is on the client software to provide access to whichever port the server has been instructed to use.

It is often useful for the remote IP address to default to the loopback address, particularly for services such as databases where there is a good chance of the client and server being run on the same machine. Alternatively, it is sometimes preferable to require that the destination be specified explicitly.

For most purposes the best way to construct the remote address is by calling getaddrinfo. This takes a string containing either a hostname or an IP address, and a second string containing either a service name or a port number. These are converted into a sockaddr_in or a sockaddr_in6 as appropriate:

const char* hostname=0; /* localhost */const char* portname="daytime";struct addrinfo hints;memset(&hints,0,sizeof(hints));hints.ai_family=AF_UNSPEC;hints.ai_socktype=SOCK_STREAM;hints.ai_protocol=0;hints.ai_flags=AI_ADDRCONFIG;struct addrinfo* res=0;int err=getaddrinfo(hostname,portname,&hints,&res);if (err!=0) { die("failed to resolve remote socket address (err=%d)",err);}



The socket type has been constrained to SOCK_STREAM. This allows TCP but excludes UDP. The protocol has been left unspecified because it is only meaningful in the context of a


The AI_PASSIVE flag has not been set because the result is intended for use as a remote address. Its absence causes the IP address to default to the loopback address (as opposed to the wildcard address).


The res argument is used to return a linked list of addrinfo structures containing the address or addresses that were found. If multiple records are returned then the recommended behaviour (from RFC 1123) is to try each address in turn, stopping when a connection is successfully established. When doing this you may wish to limit the number of addresses tried and/or allow connection attempts to overlap, in order to prevent the cumulative timeout period from becoming excessive.

The memory occupied by the result list should be released by calling freeaddrinfo once it is no longer needed, however this cannot be done until after the socket has been connected.

Create the client socket

The socket that will be used to establish the connection should be created using the socket function. This takes three arguments:

1. the domain (AF_INET or AF_INET6 in this case, corresponding to IPv4 or IPv6 respectively),2. the socket type (SOCK_STREAM in this case, meaning that the socket should provide

reliable transport of an unstructured byte stream), and3. the protocol (IPROTO_TCP in this case, corresponding to TCP).

A value of 0 for the protocol requests the default for the given address family and socket type, which for AF_INET or AF_INET6 and SOCK_STREAMwould be IPPROTO_TCP. It is equally acceptable for the protocol to be deduced in this manner or specified explicitly.



When iterating through a list of addresses returned by getaddrinfo it is potentially necessary to create a separate socket for each, because the addresses will not necessarily be members of the same address family or use the same protocol.

Connect the socket to the remote address.

A connection is established by calling the connect function:

if (connect(fd,res->ai_addr,res->ai_addrlen)==-1) { die("%s",strerror(errno));}

The first argument is the socket descriptor. The second and third arguments are the remote socket address and its length.


By default the connect function blocks until the initial TCP handshake has been completed and the socket is ready for use, or alternatively, until the connection attempt fails. Some types of connection failure are reported very quickly, whereas others can only be detected by means of a timeout. In the latter case connect may block for several minutes.

If the remote address was constructed using getaddrinfo then the memory occupied by the address list can now be released:

freeaddrinfo(res);


The socket descriptor is now ready for use. Here is an example of how it might be utilised to implement a Daytime Protocol client:

char buffer[256];for (;;) { ssize_t count=read(fd,buffer,sizeof(buffer)); if (count<0) { if (errno!=EINTR) die("%s",strerror(errno)); } else if (count==0) { break; } else { write(STDOUT_FILENO,buffer,count); }}close(fd);

See also

Listen for and accept TCP connections in C Send a UDP datagram in C Send an arbitrary IPv4 datagram using a raw socket in C

Further Reading

Listen for and accept TCP connections in C , microHOWTO

Listen for and accept TCP connections in C

Content

1 Objective 2 Scenario 3 Method o 3.1 Overview o 3.2 Construct the local socket address o 3.3 Create the server socket

http://www.microhowto.info/howto/listen_for_and_accept_tcp_connections_in_c.html#idp32976










o 3.4 Set the SO_REUSEADDR socket option o 3.5 Bind the local address to the server socket o 3.6 Listen for connections o 3.7 Accept connections as they arrive 4 Variations o 4.1 Determining the remote address o 4.2 Constructing the local socket address without using

getaddrinfo 5 See also 6 Further Reading

Tested on

Debian (Lenny)

Ubuntu (Trusty)

Objective

To listen for and accept inbound TCP connections in C

Scenario

Suppose that you wish to write a daemon that implements the TCP-based variant of the Daytime Protocol, as defined by RFC 867

This is a very simple protocol whereby the server sends a human-readable copy of the current date and time then closes the connection. Any data that the client might send is ignored.

Method

Overview

The method described here has six steps:

1. Construct the local socket address.2. Create the server socket.3. Set the SO_REUSEADDR socket option.4. Bind the local address to the server socket.5. Listen for inbound connections.6. Accept connections as they arrive.


#include <errno.h>#include <string.h>












#include <unistd.h>#include <netdb.h>#include <sys/socket.h>#include <netinet/in.h>

Construct the local socket address

In order to listen for TCP connections it is necessary to choose a port number and, optionally, a local IP address on which to listen. The combination of these two values is treated as a single entity called the socket address, which is represented by a struct sockaddr_in for IPv4 or a struct sockaddr_in6 for IPv6.

Most common network services have an assigned port number on which they are normally expected to listen. While it makes sense to use this as the default, it is good practice to make the port number configurable. Possible reasons for wanting to override the assigned port number include:

running multiple instances of a network service on the same machine, running a network service that would normally use a well-known port number from a non-

root account, or making port scanning more time-consuming than it would be if the standard port number

were used.

The local IP address should normally default to either the the wildcard address or the loopback address, but like the port number it is good practice to make it configurable. When a service is bound to a particular IP address it will only accept connections directed to that address, whereas when bound to the wildcard address it will accept connections to any local address. Binding to the loopback address has the effect of prohibiting connections from other machines.

For most purposes the best way to construct the socket address is by calling getaddrinfo. This takes a string containing the IP address and a string containing the port number, and converts them into a sockaddr_in or a sockaddr_in6 as appropriate. It is also able to resolve hostnames and service names:

const char* hostname=0; /* wildcard */const char* portname="daytime";struct addrinfo hints;memset(&hints,0,sizeof(hints));hints.ai_family=AF_UNSPEC;hints.ai_socktype=SOCK_STREAM;hints.ai_protocol=0;hints.ai_flags=AI_PASSIVE|AI_ADDRCONFIG;struct addrinfo* res=0;int err=getaddrinfo(hostname,portname,&hints,&res);if (err!=0) { die("failed to resolve local socket address (err=%d)",err);}



The socket type has been constrained to SOCK_STREAM. This allows TCP but excludes UDP. The protocol has been left unspecified because it is only meaningful in the context of a


The AI_PASSIVE flag has been set because the address is intended for binding to a server socket. It causes the IP address to default to the wildcard address as opposed to the loopback address.


The res argument is used to return a linked list of addrinfo structures containing the address or addresses that were found. If the network service daemon has the ability to listen on multiple sockets then it should open one for each address in the list. Otherwise it is considered acceptable to use the first result and discard the remainder.

The memory occupied by the result list should be released by calling freeaddrinfo once it is no longer needed, however this cannot be done until after the socket has been created and bound.

Create the server socket

The socket that will be used to listen for connections should be created using the socket function. This takes three arguments:

1. the domain (AF_INET or AF_INET6 in this case, corresponding to IPv4 or IPv6 respectively),2. the socket type (SOCK_STREAM in this case, meaning that the socket should provide reliable

transport of an unstructured byte stream), and3. the protocol (IPROTO_TCP in this case, corresponding to TCP).

A value of 0 for the protocol requests the default for the given address family and socket type, which for AF_INET or AF_INET6 and SOCK_STREAMwould be IPPROTO_TCP. It is equally acceptable for the protocol to be deduced in this manner or specified explicitly.

Assuming you previously used getaddrinfo to construct the local address then the required values can be obtained from the addrinfo structure:

int server_fd=socket(res->ai_family,res->ai_socktype,res->ai_protocol);if (server_fd==-1) { die("%s",strerror(errno));}

Set the SO_REUSEADDR socket option

SO_REUSEADDR should be routinely set for TCP server sockets in order to allow the network service to be restarted when there are connections in the ESTABLISHED or TIME-WAIT state:

int reuseaddr=1;if (setsockopt(server_fd,SOL_SOCKET,SO_REUSEADDR,&reuseaddr,sizeof(reuseaddr))==-1) { die("%s",strerror(errno));}

See ‘Listen on a TCP port with connections in the TIME-WAIT state’ for a detailed discussion of this issue.

Bind the local address to the server socket

As noted previously, the server socket must be bound to a local address before it can listen for connections. This should be done using the bindfunction:

if (bind(server_fd,res->ai_addr,res->ai_addrlen)==-1) { die("%s",strerror(errno));}

The first argument is the socket descriptor. The second and third arguments are the local address and its length.

If the local address was constructed using getaddrinfo then the memory occupied by the address list can now be released:

freeaddrinfo(res);


Listen for connections

The server socket can now be instructed to listen for connections. This should be done using the listen function:

if (listen(server_fd,SOMAXCONN)) { die("failed to listen for connections (errno=%d)",errno);}

The first argument is the socket descriptor. The second argument is the backlog of outstanding connections that the operating system should queue while they are waiting to be accepted by the server process. It is only a hint: most implementations take some account of the value requested,

http://www.microhowto.info/howto/listen_on_a_tcp_port_with_connections_in_the_time_wait_state.html

but you should not make any assumptions. A value of SOMAXCONN indicates that the maximum permissible queue length should be selected.

The optimum value for the backlog depends on the nature of the load:

If the value is too low then the server will be poor at handling short-term bursts of activity. Connections may be rejected even if the average load is well below what the server can handle.

If the value is too high then the server will perform less well when it is genuinely overloaded. Under those circumstances, lengthening the queue merely increases latency without improving capacity.

A backlog of 5 is a popular choice due to its use in many tutorials. For services that receive connections at a very slow rate this is probably adequate, but it is too low for services that handle many short-lived connections (such as web servers). In that case the author's advice would be to make the value configurable, with a default of SOMAXCONN.

Accept connections as they arrive

Connections are accepted by the server process by repeatedly calling the accept function. Each time this is done a new socket descriptor is returned to act as an endpoint for the newly established connection. If no connections are available then the function blocks.

The process of handling a connection should preferably not interfere with the acceptance or handling of other connections. One way to ensure this is to spawn a new child process for each connection:

for (;;) { int session_fd=accept(server_fd,0,0); if (session_fd==-1) { if (errno==EINTR) continue; die("failed to accept connection (errno=%d)",errno); } pid_t pid=fork(); if (pid==-1) { die("failed to create child process (errno=%d)",errno); } else if (pid==0) { close(server_fd); handle_session(session_fd); close(session_fd); _exit(0); } else { close(session_fd); }}

The parent process should close the descriptor for each connected socket once the corresponding child process has been spawned. There are two reasons for doing this: to prevent the descriptors from accumulating, and to prevent the connection from being held open by the parent after it has been closed by the child. Similarly, the child process should close any file or socket descriptors inherited from the parent that it does not need access to. This will certainly include the descriptor for the server socket, but you should consider whether there are any others.

Functionality that is specific to the network service is represented here by the function handle_session. As a simple example, here is an implementation of the Daytime Protocol:

void handle_session(int session_fd) { time_t now=time(0); char buffer[80]; size_t length=strftime(buffer,sizeof(buffer),"%a %b %d %T %Y\r\n",localtime(&now)); if (length==0) { snprintf(buffer,sizeof(buffer),"Error: buffer overflow\r\n"); }

size_t index=0; while (index<length) { ssize_t count=write(session_fd,buffer+index,length-index); if (count<0) { if (errno==EINTR) continue; die("failed to write to socket (errno=%d)",errno); } else { index+=count; } }}

Variations

Determining the remote address

It is often desirable and sometimes necessary to determine the remote address from which an inbound connection originated. A common reason for wanting to do this is to keep an log of all connections. Other possible motivations include access control, or establishing an outbound connection back to the client.

The address can be obtained at the time when the connection is accepted by supplying a buffer to place it in. Alternatively, it can be obtained at any time while the connection is open by calling getpeername.

The supplied buffer must be large enough and sufficiently well-aligned to accept any socket address that might be returned. If the address family has not been hard-coded then you can use the type struct sockaddr_storage, which is designed to hold addresses of any type:

struct sockaddr_storage sa;socklen_t sa_len=sizeof(sa);int session_fd=accept(server_fd,(struct sockaddr*)&sa,&sa_len);

Alternatively, if the local address was constructed using getaddrinfo then the required size in bytes can be found in the ai_addrlen member of the relevant addrinfo structure.

If there is a need to convert the address to human-readable form then this is best done using the getnameinfo function, especially if it is not known whether the address family is IPv4 or IPv6:

char buffer[INET6_ADDRSTRLEN];int err=getnameinfo((struct sockaddr*)&sa,sa_len,buffer,sizeof(buffer),0,0,NI_NUMERICHOST);

if (err!=0) { snprintf(buffer,sizeof(buffer),"invalid address");}

A useful refinement is to convert IPv4-mapped addresses into plain IPv4 addresses prior to calling getnameinfo:

if (sa.ss_family==AF_INET6) { struct sockaddr_in6* sa6=(struct sockaddr_in6*)&sa; if (IN6_IS_ADDR_V4MAPPED(&sa6->sin6_addr)) { struct sockaddr_in sa4; memset(&sa4,0,sizeof(sa4)); sa4.sin_family=AF_INET; sa4.sin_port=sa6->sin6_port; memcpy(&sa4.sin_addr.s_addr,sa6->sin6_addr.s6_addr+12,4); memcpy(&sa,&sa4,sizeof(sa4)); sa_len=sizeof(sa4); }}

For example, if a IPv4 connection from 192.168.0.1 were received using an IPv6 socket then the code fragment above would cause the address to be presented as 192.168.0.1 instead of the less readable ::ffff:192.168.0.1.

Constructing the local socket address without using getaddrinfo

There are some circumstances where getaddrinfo is not he best way to construct the local socket address. For example, you may already have the port number and IP address in numeric form, or you may need to be compatible with older systems on which getaddrinfo is not available. A solution in these cases is to construct the socket address explicitly.

An IPv4 socket address is represented by a struct sockaddr_in. It should be zeroed before use, and any information within it should be stored in network byte order. For example, to create a socket address with a port number of 13 and the wildcard IP address:

struct sockaddr_in addr;memset(&addr,0,sizeof(addr));addr.sin_family=AF_INET;addr.sin_port=htons(13);addr.sin_addr.s_addr=htonl(INADDR_ANY);

Similarly for IPv6:

struct sockaddr_in6 addr;memset(&addr,0,sizeof(addr));addr.sin6_family=AF_INET6;addr.sin6_flowinfo=0;addr.sin6_port=htons(13);addr.sin6_addr=in6addr_any;

See also

Listen on a TCP port with connections in the TIME-WAIT state Establish a TCP connection in C Listen for and receive UDP datagrams in C

Further Reading

Listen on a TCP port with connections in the TIME-WAIT state Convert an IP address to a human-readable string in C

Listen on a TCP port with connections in the TIME-WAIT state

Content

1 Objective 2 Background 3 Scenario 4 Method 5 Notes 6 Methods to avoid o 6.1 Using SO_LINGER

Tested on

Debian (Lenny, Precise)

Objective

To begin listening on a TCP port whilst there are one or more connections to that port in the TIME-WAIT state, without waiting for the TIME-WAIT state to expire.

Background

When a TCP connection is closed then the socket from which the closure was initiated is not destroyed immediately. Instead it is placed in the TIME-WAIT state, where it is required to remain for at least twice the maximum segment lifetime (MSL) to allow any stray network packets to dissipate. During this period it is not permissible for another TCP connection to be established between the same pair of IP addresses and port numbers.

By itself this would be no great burden, but most implementations go further and (by default) do not allow a local address to be bound to a socket if there are any existing sockets using the same IP address and port number (including sockets in the TIME-WAIT state).

The practical effect of this behaviour is that when a network service terminates leaving connections in the TIME-WAIT state, it may not be possible to restart that service until the TIME-WAIT states

http://www.microhowto.info/howto/listen_on_a_tcp_port_with_connections_in_the_time_wait_state.html#idp26320







http://www.microhowto.info/howto/convert_an_ip_address_to_a_human_readable_string_in_c.html


http://www.microhowto.info/howto/listen_for_and_receive_udp_datagrams_in_c.html



have expired. The error reported when this happens is EADDRINUSE, which glibc renders as “Address already in use”.

Note that TIME-WAIT is not the only issue that could result in an EADDRINUSE error. For example, there could be orphaned child processes that were spawned by the network service but are still handling connections. Alternatively there could be another process listening to the port, perhaps because the previously running instance of the network service failed to die. You can check for these conditions by running the netstatcommand, without the -l option for connected sockets:

netstat -tn

and with the -l option for listening sockets:

netstat -tln

The maximum segment lifetime is implementation-dependent, but is typically in the range 30 seconds to 2 minutes. The minimum lifetime of the TIME-WAIT state is therefore typically in the range 1 to 4 minutes.

Scenario

Suppose you are writing a daemon that provides a TCP-based network service. Currently the following sequence of operations is used to open a server socket and listen on the required port:

int fd=socket(AF_INET,SOCK_STREAM,0);if (fd==-1) { die("%s",strerror(errno));}

if (bind(fd,(struct sockaddr*)&addr,sizeof(addr))==-1) { die("%s",strerror(errno));}

if (listen(fd,SOMAXCONN)==-1) { die("%s",strerror(errno));}

When the network service is restarted it sometimes fails with the error “Address already in use”. You wish to prevent this from happening.

Method

The error can be avoided by setting the SO_REUSEADDR socket option after the socket has been created but before calling bind:

int fd=socket(AF_INET,SOCK_STREAM,0);

if (fd==-1) { die("%s",strerror(errno));}

int reuseaddr=1;if (setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,&reuseaddr,sizeof(reuseaddr))==-1) { die("%s",strerror(errno));}

if (bind(fd,(struct sockaddr*)&addr,sizeof(addr))==-1) { die("%s",strerror(errno));}

if (listen(fd,SOMAXCONN)==-1) { die("%s",strerror(errno));}

SO_REUSEADDR allows a local address to be bound to a socket even if that address is already being used by a connection. This is helpful not only for dealing with connections in the TIME-WAIT state, but also any ESTABLISHED connections that are being handled by orphaned child processes.

It is considered safe for a TCP server socket to reuse a local address, because such sockets are used only to listen for connections and do not themselves act as endpoints. When new connections arrive they will need to be checked to ensure that they do not clash with existing ones, but this is something the network stack should be doing anyway: it makes no difference that the server process has been restarted.

In the absence of any good reason for leaving SO_REUSEADDR unset, it is considered good practice to set it as a matter of routine when creating TCP server sockets.

Notes

Depending on the implementation, it may be necessary for SO_REUSEADDR to be set both before and after the service is restarted.

SO_REUSEADDR does not allow two TCP sockets to listen to the same IP address and port number at the same time.

Methods to avoid

Using SO_LINGER

It is possible to prevent the TIME-WAIT state from being entered in the first place by setting theSO_LINGER option with a timeout of zero. This changes the behaviour of the close function: instead of performing a graceful shutdown, it aborts the connection by sending an immediate RST. Any unsent data is discarded and the socket immediately reverts to the CLOSED state.

Whilst this would meet the objective as stated, it is not a desirable solution because it circumvents the protection against stray network packets provided by the TIME-WAIT state. Since SO_REUSEADDR achieves the desired effect more safely, there is no justification for using SO_LINGER to avoid EADDRINUSE errors.

Convert an IP address to a human-readable string in C

Content

1 Objective 2 Scenario 3 Method 4 Variations o 4.1 Converting IPv4-mapped IPv6 addresses to plain IPv4 5 Alternatives o 5.1 Using inet_ntop o 5.2 Using inet_ntoa

Tested on

Debian (Lenny)

Ubuntu (Precise, Trusty)

Objective

To convert an IPv4 or IPv6 address to a human-readable string (for example 192.168.0.1 or 2001:db8::1)

Scenario

Suppose you have used the getpeername function to obtain the remote address to which a particular TCP socket is connected:

struct sockaddr_storage addr;socklen_t addr_len=sizeof(addr);int err=getpeername(sock_fd,(struct sockaddr*)&addr,&addr_len);if (err!=0) { die("failed to fetch remote address (errno=%d)",errno);}

The remote address has been written to a buffer called addr. This buffer is of type struct sockaddr_storage, but the address stored within it will be of type struct sockaddr_in or sockaddr_in6. The length of the address has been recorded in the variable addr_len. Note that:

addr is a socket address, so in addition to the IP address it contains information such as the address family and port number.

addr_len will probably not be equal to sizeof(struct sockaddr_storage) once the call to getpeername has completed.

You wish to convert the IP address contained within addr to a human-readable string.

http://www.microhowto.info/howto/convert_an_ip_address_to_a_human_readable_string_in_c.html#idp32416








Method

One way to perform the required conversion is to call the getnameinfo function. By default this attempts to convert the address into a domain name, however it can be instructed to produce a numeric address instead by setting the NI_NUMERICHOST flag:

#include <netdb.h>#include <sys/socket.h>#include <netinet/in.h>

// ...

char buffer[INET6_ADDRSTRLEN];int err=getnameinfo((struct sockaddr*)&addr,addr_len,buffer,sizeof(buffer), 0,0,NI_NUMERICHOST);if (err!=0) { die("failed to convert address to string (code=%d)",err);}printf("Remote address: %s\n",buffer);

The string buffer needs to be at least INET_ADDRSTRLEN bytes long for IPv4 and INET6_ADDRSTRLEN for IPv6. Since these constants are fixed (by POSIX) at 16 and 46 bytes respectively, INET6_ADDRSTRLEN can be presumed to suffice for either address family.

Variations

Converting IPv4-mapped IPv6 addresses to plain IPv4

If an IPv4 connection is made to an IPv6 socket then the local and remote network addresses will be represented as IPv4-mapped addresses. For example, the IPv4 address 192.168.0.1 would be represented by the IPv6 address ::ffff:192.168.0.1.

This format is readable, but it is probably not the best choice for presentation to the user. Since the connection was made using IPv4, the user could reasonably expect to see an IPv4 address. This can be achieved by converting the address from IPv6 to IPv4 before calling getnameinfo:

if (addr.ss_family==AF_INET6) { struct sockaddr_in6* addr6=(struct sockaddr_in6*)&addr; if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) { struct sockaddr_in addr4; memset(&addr4,0,sizeof(addr4)); addr4.sin_family=AF_INET; addr4.sin_port=addr6->sin6_port; memcpy(&addr4.sin_addr.s_addr,addr6->sin6_addr.s6_addr+12,sizeof(addr4.sin_addr.s_addr)); memcpy(&addr,&addr4,sizeof(addr4)); addr_len=sizeof(addr4); }}

The conversion is performed only if the address family is IPv6, and then only if the address if IPv4-mapped. The address buffer must be writable, and of the appropriate size and alignment to hold an IPv4 or IPv6 socket address. (That is the case here because the buffer is of typestruct sockaddr_storage).

Alternatives

Using inet_ntop

An alternative method is to use the function inet_ntop. This is somewhat easier to use than getnameinfo if the IP address is not already embedded within a socket address, for example:

#include <arpa/inet.h>

// ...

char buffer[INET4_ADDRSTRLEN];const char* result=inet_ntop(AF_INET,&ipv4addr,buffer,sizeof(buffer));if (result==0) { die("failed to convert address to string (errno=%d)",errno);}

IPv6 addresses can be handled by specifying AF_INET6 as the first argument, but (unlike getnameinfo) the result will not include the scope of a link-local or site-local address.

For both IPv4 and IPv6 the address passed in must be in network byte order (most significant byte first).

Using inet_ntoa

Another alternative is to use the function inet_ntoa. As with inet_ntop, the given IP address need not be embedded within a socket address:

#include <arpa/inet.h>

// ...

const char* result=inet_ntoa(&ipv4addr);

Notable disadvantages of inet_ntoa are that it is not thread safe and provides no support for IPv6. However it does pre-date both getnameinfoand inet_ntop, so is more likely to be available on older systems.

Ifconfig: 10 Examples To Configure Network

Interfaceby RAMESH NATARAJAN on MARCH 9, 2009

This article is written by Lakshmanan G

Ifconfig command is used to configure network interfaces. ifconfig stands for interface

configurator. Ifconfig is widely used to initialize the network interface and to enable or

disable the interfaces.

In this article, let us review 7 common usages of ifconfig command.

1. View Network Settings of an Ethernet Adapter

Ifconfig, when invoked with no arguments will display all the details of currently active

interfaces. If you give the interface name as an argument, the details of that specific

interface will be displayed.

# ifconfig eth0

eth0 Link encap:Ethernet HWaddr 00:2D:32:3E:39:3B

inet addr:192.168.2.2 Bcast:192.168.2.255 Mask:255.255.255.0

inet6 addr: fe80::21d:92ff:fede:499b/64 Scope:Link

UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1

RX packets:977839669 errors:0 dropped:1990 overruns:0 frame:0

TX packets:1116825094 errors:8 dropped:0 overruns:0 carrier:0

collisions:0 txqueuelen:1000

RX bytes:2694625909 (2.5 GiB) TX bytes:4106931617 (3.8 GiB)

Interrupt:185 Base address:0xdc00

2. Display Details of All interfaces Including Disabled Interfaces

# ifconfig -a

3. Disable an Interface

# ifconfig eth0 down

4. Enable an Interface

# ifconfig eth0 up

Or # ifup eth0

5. Assign ip-address to an Interface

Assign 192.168.2.2 as the IP address for the interface eth0.

# ifconfig eth0 192.168.2.2

Change Subnet mask of the interface eth0.

# ifconfig eth0 netmask 255.255.255.0

Change Broadcast address of the interface eth0.

# ifconfig eth0 broadcast 192.168.2.255

Assign ip-address, netmask and broadcast at the same time to interface eht0.

# ifconfig eth0 192.168.2.2 netmask 255.255.255.0 broadcast 192.168.2.255

6. Change MTU

This will change the Maximum transmission unit (MTU) to XX. MTU is the maximum

number of octets the interface is able to handle in one transaction. For Ethernet the

Maximum transmission unit by default is 1500.

# ifconfig eth0 mtu XX

7. Promiscuous mode

By default when a network card receives a packet, it checks whether the packet

belongs to itself. If not, the interface card normally drops the packet. But in

promiscuous mode, the card doesn’t drop the packet. Instead, it will accept all the

packets which flows through the network card.

Superuser privilege is required to set an interface in promiscuous mode. Most network

monitor tools use the promiscuous mode to capture the packets and to analyze the

network traffic.

Following will put the interface in promiscuous mode.

# ifconfig eth0 promisc

Following will put the interface in normal mode.

# ifconfig eth0 -promisc

8 How to Add New Alias to Network Interface

The ifconfig utility allows you to configure additional network interfaces

using alias feature. To add alias network interface of eth0, use the following command.

Please note that alias network address in same sub-net mask. For example, if

your eth0 network ip address is 172.16.25.125, then alias ip address must

be172.16.25.127.

[root@tecmint ~]# ifconfig eth0:0 172.16.25.127

Next, verify the newly created alias network interface address, by using “ifconfig eth0:0”

command.

[root@tecmint ~]# ifconfig eth0:0

eth0:0 Link encap:Ethernet HWaddr 00:01:6C:99:14:68

inet addr:172.16.25.123 Bcast:172.16.25.63 Mask:255.255.255.240

UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1

Interrupt:17

9 How to Remove Alias to Network Interface

If you no longer required an alias network interface or you incorrectly configured it, you

can remove it by using the following command.

[root@tecmint ~]# ifconfig eth0:0 down

10 How to Change the MAC address of Network Interface

To change the MAC (Media Access Control) address of an eth0 network interface, use

the following command with argument “hw ether“. For example, see below.

[root@tecmint ~]# ifconfig eth0 hw ether AA:BB:CC:DD:EE:FF

These are the most useful commands for configuring network interfaces in Linux, for

more information and usage of ifconfig command use the manpages like “man ifconfig”

at the terminal. Check out some other networking utilities below.

Configure network card in promiscuous modeWhen running in promiscuous mode, all traffic the network card receives can be read. This configuration is useful for us to do network

monitoring, like for a network intrusion detection system.

How can I config my network card in promiscuous mode?

You can do this easily by one command. It works on both RedHat and Debian based distributions. Below is an example:

http://en.wikipedia.org/wiki/Promiscuous_mode

root@db1:~# ifconfig eth1 promisc

[2685638.719679] device eth1 entered promiscuous mode

root@db1:~# ifconfig eth1 -promisc

root@db1:~# dmesg | tail -1

[2685655.668037] device eth1 left promiscuous mode

Then, how can we setup the promiscuous mode in configuration files, so that it takes effect when system boots? As the

configuration varies by distribution, here we raise two examples.

Setup promiscuous mode on Redhat / CentOS

To configure a network card in promiscuous mode, you need to put the line PROMISC=yes in its configuration

file/etc/sysconfig/network-scripts/ifcfg-ethX.

BOOTPROTO=static

DEVICE=ethX

ONBOOT=yes

TYPE=Ethernet

PROMISC=yes

USERCTL=no

Don’t forget to replace ethX to the right device you are using.

Setup promiscuous mode on Ubuntu / Debian

Below is part of an example file of /etc/network/interface:

auto eth0

iface eth0 inet manual

up ifconfig $IFACE 192.168.1.100 up

up ip link set $IFACE promisc on

down ip link set $IFACE promisc off

down ifconfig $IFACE down

TCPDUMP INFO

When it comes to tcpdump most admins fall into two categories; they either know tcpdump and all of its flags like the back of their hand, or they kind of know it but need to use a reference for anything outside of the basic usage. The reason for this is because tcpdump is a pretty advanced command and it is pretty easy to get into the depths of how networking works when using it.

For today's article I wanted to create a quick but practical reference for tcpdump. I will cover the basics as well as some of the more advanced usage. I am sure I will most likely leave out some cool commands so if you want to add anything please feel free to drop it into the comments section.

Before we get too far into the weeds, it is probably best to cover what tcpdump is used for. The commandtcpdump is used to create "dumps" or "traces" of network traffic. It allows you to look at what is happening on the network and really can be useful for troubleshooting many types of issues including issues that aren't due to network communications. Outside of network issues I use tcpdump to troubleshoot application issues all the time; if you ever have two applications that don't seem to be working well together, tcpdump is a great way to see what is happening. This is especially true if the traffic is not encrypted as tcpdump can be used to capture and read packet data as well.

The Basics

The first thing to cover with tcpdump is what flags to use. In this section I am going to cover the most basic flags that can be used in most situations.

Don't translate hostnames, ports, etc

# tcpdump -n

By default tcpdump will try to lookup and translate hostnames and ports.

# tcpdump

tcpdump: verbose output suppressed, use -v or -vv for full protocol decode

listening on eth0, link-type EN10MB (Ethernet), capture size 65535 bytes

16:15:05.051896 IP blog.ssh > 10.0.3.1.32855: Flags [P.], seq 2546456553:2546456749, ack 1824683693, win 355, options [nop,nop,TS val 620879437 ecr 620879348], length 196

You can turn this off by using the -n flag. Personally, I always use this flag as the hostname and port translation usually annoys me because I tend to work from IP addresses rather than hostnames. However, knowing that you can have tcpdump translate or not translate these are useful; as there are times where knowing what server the source traffic is coming from is important.

# tcpdump -n



16:23:47.934665 IP 10.0.3.246.22 > 10.0.3.1.32855: Flags [P.], seq 2546457621:2546457817, ack 1824684201, win 355, options [nop,nop,TS val 621010158 ecr 621010055], length 196

Adding verbosity

# tcpdump -v

By adding a simple -v the output will start including a bit more such as the ttl, total length and options in an the IP packets.

# tcpdump




tcpdump has three verbosity levels, you can add more verbosity by adding additional v's to the command line flags. In general whenever I am using tcpdump I tend to use the highest verbosity, as I like having everything visible just in case I need it.

# tcpdump -vvv -c 1

tcpdump: listening on eth0, link-type EN10MB (Ethernet), capture size 65535 bytes

16:36:13.873456 IP (tos 0x10, ttl 64, id 121, offset 0, flags [DF], proto TCP (6), length 184)

blog.ssh > 10.0.3.1.32855: Flags [P.], cksum 0x1ba1 (incorrect -> 0x0dfd), seq 2546458841:2546458973, ack 1824684869, win 355, options [nop,nop,TS val 621196643 ecr 621196379], length 132

Specifying an Interface

# tcpdump -i eth0

By default when you run tcpdump without specifying an interface it will choose the lowest numbered interface, usually this is eth0 however that is not guaranteed for all systems.

# tcpdump




You can specify the interface by using the -i flag followed by the interface name. On most linux systems a special interface name of any can be used to tell tcpdump to listen on all interfaces, I find this extremely useful when troubleshooting servers with multiple interfaces. This is especially true when there are routing issues involved.

# tcpdump -i any


listening on any, link-type LINUX_SLL (Linux cooked), capture size 65535 bytes


Writing to a file

# tcpdump -w /path/to/file

When you just run tcpdump by itself it will output to your screen.

# tcpdump




There are many times where you may want to save the tcpdump data to a file, the easiest way to do this is to use the -w flag. This is useful for situations where you may need to save the network dump to review later. One benefit to saving the data to a file is that you can read the dump file multiple times and apply other flags or filters (which we will cover below) to that snapshot of network traffic.

# tcpdump -w /var/tmp/tcpdata.pcap

tcpdump: listening on eth0, link-type EN10MB (Ethernet), capture size 65535 bytes

1 packet captured

2 packets received by filter

0 packets dropped by kernel

By default the data is buffered and will not usually be written to the file until you CTRL+C out of the runningtcpdump command.

Reading from a file

# tcpdump -r /path/to/file

Once you save the output to a file you will inherently need to read that file. To do this you can simply use the -r flag followed by the path to the file.

# tcpdump -r /var/tmp/tcpdata.pcap

reading from file /var/tmp/tcpdata.pcap, link-type EN10MB (Ethernet)


As a quick note, if you are more familiar with tools such as wireshark you can read files saved by tcpdump with most network troubleshooting tools like wireshark.

https://www.wireshark.org/

Specifying the capture size of each packet

# tcpdump -s 100

By default most newer implementations of tcpdump will capture 65535 bytes, however in some situations you may not want to capture the default packet length. You can use -s to specify the "snaplen" or "snapshot length" that you want tcpdump to capture.

Specifying the number of packets to capture

# tcpdump -c 10

When you run tcpdump by itself it will keep running until you hit CTRL+C to quit.

# tcpdump host google.com



^C

0 packets captured



You can tell tcpdump to stop capturing after a certain number of packets by using the -c flag followed by the number of packets to capture. This is pretty useful for situations where you may not want tcpdump to spew output to your screen so fast you can't read it, however generally this is more useful when you are using filters to grab specific traffic.

Pulling the basics together

# tcpdump -nvvv -i any -c 100 -s 100

All of the basic flags that were covered above can also be combined to allow you to specify exactly what you want tcpdump to provide.

# tcpdump -w /var/tmp/tcpdata.pcap -i any -c 10 -vvv

tcpdump: listening on any, link-type LINUX_SLL (Linux cooked), capture size 65535 bytes

10 packets captured



# tcpdump -r /var/tmp/tcpdata.pcap -nvvv -c 5

reading from file /var/tmp/tcpdata.pcap, link-type LINUX_SLL (Linux cooked)


10.0.3.246.22 > 10.0.3.1.32855: Flags [P.], cksum 0x1b51 (incorrect -> 0x72bc), seq 2547781277:2547781329, ack 1824703573, win 355, options [nop,nop,TS val 622081791 ecr 622081775], length 52


10.0.3.1.32855 > 10.0.3.246.22: Flags [.], cksum 0x1b1d (incorrect -> 0x4950), seq 1, ack 52, win 541, options [nop,nop,TS val 622081791 ecr 622081791], length 0


10.0.3.246.22 > 10.0.3.1.32855: Flags [P.], cksum 0x1b91 (incorrect -> 0x98c3), seq 52:168, ack 1, win 355, options [nop,nop,TS val 622081792 ecr 622081791], length 116


10.0.3.1.32855 > 10.0.3.246.22: Flags [.], cksum 0x1b1d (incorrect -> 0x48da), seq 1, ack 168, win 541, options [nop,nop,TS val 622081792 ecr 622081792], length 0


10.0.3.246.22 > 10.0.3.1.32855: Flags [P.], cksum 0x1b51 (incorrect -> 0xc3ba), seq 168:220, ack 1, win 355, options [nop,nop,TS val 622082040 ecr 622081792], length 52

Filters

Now that we have covered some of the basic flags we should cover filtering. tcpdump has the ability to filter the capture or output based on a variety of expressions, in this article I am only going to cover a few quick examples to give you an idea of the syntax. For a full list you can checkout the pcap-filter section of the tcpdumpmanpage.

Searching for traffic to and from a specific host

# tcpdump -nvvv -i any -c 3 host 10.0.3.1

The above command will run a tcpdump and send the output to the screen like we saw with the flags before, however it will only do so if the source or destination IP address is 10.0.3.1. Essentially by adding host 10.0.3.1 we are asking tcpdump to filter out anything that is not to or from 10.0.3.1.

http://www.tcpdump.org/manpages/pcap-filter.7.html

# tcpdump -nvvv -i any -c 3 host 10.0.3.1



10.0.3.246.22 > 10.0.3.1.32855: Flags [P.], cksum 0x1ba1 (incorrect -> 0x9f75), seq 2547785621:2547785753, ack 1824705637, win 355, options [nop,nop,TS val 622366941 ecr 622366923], length 132


10.0.3.1.32855 > 10.0.3.246.22: Flags [.], cksum 0x1b1d (incorrect -> 0x7c34), seq 1, ack 132, win 540, options [nop,nop,TS val 622366941 ecr 622366941], length 0


10.0.3.246.22 > 10.0.3.1.32855: Flags [P.], cksum 0x1d71 (incorrect -> 0x3443), seq 132:728, ack 1, win 355, options [nop,nop,TS val 622366943 ecr 622366941], length 596

Only show traffic where the source is a specific host

# tcpdump -nvvv -i any -c 3 src host 10.0.3.1

Where the previous example showed traffic to and from 10.0.3.1 the above command will only show traffic where the source of the packet is 10.0.3.1. This is accomplished by adding src in front of the host filter. This is an additional filter that tells tcpdump to look for a specific "source". This can be reversed by using the dstfilter, which specifies the "destination".

# tcpdump -nvvv -i any -c 3 src host 10.0.3.1





10.0.3.1.32855 > 10.0.3.246.22: Flags [.], cksum 0x1b1d (incorrect -> 0x15c5), seq 0, ack 325, win 538, options [nop,nop,TS val 622411223 ecr 622411223], length 0



# tcpdump -nvvv -i any -c 3 dst host 10.0.3.1



10.0.3.246.22 > 10.0.3.1.32855: Flags [P.], cksum 0x1ba1 (incorrect -> 0x586d), seq 2547789725:2547789857, ack 1824707577, win 355, options [nop,nop,TS val 622447491 ecr 622447471], length 132


10.0.3.246.22 > 10.0.3.1.32855: Flags [P.], cksum 0x1c71 (incorrect -> 0x462e), seq 132:472, ack 1, win 355, options [nop,nop,TS val 622447491 ecr 622447491], length 340


10.0.3.246.22 > 10.0.3.1.32855: Flags [P.], cksum 0x1c51 (incorrect -> 0xf469), seq 472:780, ack 1, win 355, options [nop,nop,TS val 622447491 ecr 622447491], length 308

Filtering source and destination ports

# tcpdump -nvvv -i any -c 3 port 22 and port 60738

You can add some rather complicated filtering statements with tcpdump when you start to using operators likeand. You can think of this as something similar to if statements. In this example we are using the and operator to tell tcpdump to only output packets that have both ports 22 and 60738. This allows us to narrow down the packets to a specific session, this can be extremely useful when troubleshooting network issues.

# tcpdump -nvvv -i any -c 3 port 22 and port 60738



10.0.3.1.60738 > 10.0.3.246.22: Flags [P.], cksum 0x1b51 (incorrect -> 0x5b3c), seq 917414532:917414584, ack 1550997318, win 353, options [nop,nop,TS val 622541691 ecr 622538903], length 52


10.0.3.246.22 > 10.0.3.1.60738: Flags [P.], cksum 0x1ba1 (incorrect -> 0xb0b1), seq 1:133, ack 52, win 355, options [nop,nop,TS val 622541692 ecr 622541691], length 132


10.0.3.1.60738 > 10.0.3.246.22: Flags [.], cksum 0x1b1d (incorrect -> 0x1e3b), seq 52, ack 133, win 353, options [nop,nop,TS val 622541692 ecr 622541692], length 0

You can express the and operator in a couple of different ways, you can use and or &&. Personally, I tend to use them both; it is important to remember that if you are going to use && that you should enclose the filter expression with single or double quotes. In BASH you can use && to run one command and if successful run a second. In general it is best to simply wrap filter expressions in quotes; this will prevent any unexpected results as filters can have quite a few special characters.

# tcpdump -nvvv -i any -c 3 'port 22 && port 60738'



10.0.3.1.60738 > 10.0.3.246.22: Flags [P.], cksum 0x1b41 (incorrect -> 0x776c), seq 917414636:917414672, ack 1550997518, win 353, options [nop,nop,TS val 622547190 ecr 622541776], length 36


10.0.3.246.22 > 10.0.3.1.60738: Flags [P.], cksum 0x1b61 (incorrect -> 0xaf2d), seq 1:69, ack 36, win 355, options [nop,nop,TS val 622547191 ecr 622547190], length 68


10.0.3.1.60738 > 10.0.3.246.22: Flags [.], cksum 0x1b1d (incorrect -> 0xf264), seq 36, ack 69, win 353, options [nop,nop,TS val 622547191 ecr 622547191], length 0

Searching for traffic on one port or another

# tcpdump -nvvv -i any -c 20 'port 80 or port 443'

You can also use the or or || operator to filter tcpdump results. In this example we are using the or operator to capture traffic to and from port 80 or port 443. This example is especially useful as webservers generally have two ports open, 80 for http traffic and 443 for https.

# tcpdump -nvvv -i any -c 20 'port 80 or port 443'



10.0.3.1.50524 > 10.0.3.246.443: Flags [S], cksum 0x1b25 (incorrect -> 0x8611), seq 3836995553, win 29200, options [mss 1460,sackOK,TS val 622820379 ecr 0,nop,wscale 7], length 0


10.0.3.246.443 > 10.0.3.1.50524: Flags [R.], cksum 0x012c (correct), seq 0, ack 3836995554, win 0, length 0


10.0.3.1.60374 > 10.0.3.246.80: Flags [P.], cksum 0x1cc4 (incorrect -> 0x3a4e), seq 580573019:580573442, ack 1982754038, win 237, options [nop,nop,TS val 622821354 ecr 622815632], length 423


10.0.3.246.80 > 10.0.3.1.60374: Flags [.], cksum 0x1b1d (incorrect -> 0x45d7), seq 1, ack 423, win 243, options [nop,nop,TS val 622821355 ecr 622821354], length 0


10.0.3.246.80 > 10.0.3.1.60374: Flags [P.], cksum 0x1bda (incorrect -> 0x855c), seq 1:190, ack 423, win 243, options [nop,nop,TS val 622821355 ecr 622821354], length 189



Searching for traffic on two specific ports and from a specific host

# tcpdump -nvvv -i any -c 20 '(port 80 or port 443) and host 10.0.3.169'

While the previous example is great for looking at issues for a multiport protocol; what if this is a very high traffic webserver? The output from tcpdump may get a bit confusing. We can narrow down the results even further by adding a host filter. To do this while maintaining our or expression we can simply wrap the orstatement in parenthesis.

# tcpdump -nvvv -i any -c 20 '(port 80 or port 443) and host 10.0.3.169'



10.0.3.169.33786 > 10.0.3.246.443: Flags [S], cksum 0x1bcd (incorrect -> 0x0d96), seq 4173164403, win 29200, options [mss 1460,sackOK,TS val 623024562 ecr 0,nop,wscale 7], length 0


10.0.3.246.443 > 10.0.3.169.33786: Flags [R.], cksum 0xa64a (correct), seq 0, ack 4173164404, win 0, length 0


10.0.3.169.35629 > 10.0.3.246.80: Flags [S], cksum 0x1bcd (incorrect -> 0xdf7c), seq 1068257453, win 29200, options [mss 1460,sackOK,TS val 623024603 ecr 0,nop,wscale 7], length 0


10.0.3.246.80 > 10.0.3.169.35629: Flags [S.], cksum 0x1bcd (incorrect -> 0xed80), seq 2992472447, ack 1068257454, win 28960, options [mss 1460,sackOK,TS val 623024603 ecr 623024603,nop,wscale 7], length 0


10.0.3.169.35629 > 10.0.3.246.80: Flags [.], cksum 0x1bc5 (incorrect -> 0x8c87), seq 1, ack 1, win 229, options [nop,nop,TS val 623024604 ecr 623024603], length 0

You can use the parenthesis multiple times in a single filter, for example the below command will filter the capture to only packets that are to or from port 80 or port 443 and from hosts 10.0.3.169 and 10.0.3.1 if they are destined for 10.0.3.246.

# tcpdump -nvvv -i any -c 20 '((port 80 or port 443) and (host 10.0.3.169 or host 10.0.3.1)) and dst host 10.0.3.246'



10.0.3.1.35407 > 10.0.3.246.80: Flags [S], cksum 0x1b25 (incorrect -> 0x4890), seq 3026316656, win 29200, options [mss 1460,sackOK,TS val 623255761 ecr 0,nop,wscale 7], length 0




10.0.3.1.35407 > 10.0.3.246.80: Flags [P.], cksum 0x1cc4 (incorrect -> 0x10c2), seq 0:423, ack 1, win 229, options [nop,nop,TS val 623255763 ecr 623255762], length 423


10.0.3.1.35407 > 10.0.3.246.80: Flags [.], cksum 0x1b1d (incorrect -> 0x31e6), seq 423, ack 190, win 237, options [nop,nop,TS val 623255763 ecr 623255763], length 0

Understanding the output

Capturing network traffic with tcpdump is hard enough with all of the options, but once you have that data you have to decipher it. In this section we are going to cover how to identify the source/destination IP, source/destination Port and the type of packet for the TCP protocol. While these are all very basic items they are far from the extent of what you can identify from tcpdump, however this article is meant to be quick and dirty so we will keep it to the basics. For more information on tcpdump and what is being listed I suggest checking out the manpages.

Identifying the source and destination

Identifying the source and destination addresses and ports are actually fairly easy.

10.0.3.246.56894 > 192.168.0.92.22: Flags [S], cksum 0xcf28 (incorrect -> 0x0388), seq 682725222, win 29200, options [mss 1460,sackOK,TS val 619989005 ecr 0,nop,wscale 7], length 0

Given the above output we can see that the source ip is 10.0.3.246 the source port is 56894 and the destination ip is 192.168.0.92 with a destination port of 22. This is pretty easy to identify once you understand the format of tcpdump. If you haven't guessed the format yet you can break it down as follows src-ip.src-port > dest-ip.dest-port: Flags[S] the source is in front of the > and the destination is behind. You can think of the > as an arrow pointing to the destination.

Identifying the type of packet


http://www.tcpdump.org/manpages/

From the sample above we can tell that the packet is a single SYN packet. We can identify this by the Flags [S]section of the tcpdump output, different types of packets have different types of flags. Without going too deep into what types of packets exist within TCP you can use the below as a cheat sheet for identifying packet types.

[S] - SYN (Start Connection) [.] - No Flag Set [P] - PSH (Push Data) [F] - FIN (Finish Connection) [R] - RST (Reset Connection)

Depending on the version and output of tcpdump you may also see flags such as [S.] this is used to indicate aSYN-ACK packet.

An unhealthy example




10.0.3.246.56894 > 192.168.0.92.22: Flags [S], cksum 0xcf28 (incorrect -> 0x028e), seq 682725222, win 29200, options [mss 1460,sackOK,TS val 619989255 ecr 0,nop,wscale 7], length 0


10.0.3.246.56894 > 192.168.0.92.22: Flags [S], cksum 0xcf28 (incorrect -> 0x009a), seq 682725222, win 29200, options [mss 1460,sackOK,TS val 619989755 ecr 0,nop,wscale 7], length 0

The above sampling shows an example of an unhealthy exchange, and by unhealthy exchange for this example that means no exchange. In the above sample we can see that 10.0.3.246 is sending a SYN packet to host192.168.0.92 however we never see a response from host 192.168.0.92.

A healthy example


10.0.3.246.34908 > 192.168.0.110.22: Flags [S], cksum 0xcf3a (incorrect -> 0xc838), seq 1943877315, win 29200, options [mss 1460,sackOK,TS val 620029603 ecr 0,nop,wscale 7], length 0


192.168.0.110.22 > 10.0.3.246.34908: Flags [S.], cksum 0x594a (correct), seq 4001145915, ack 1943877316, win 5792, options [mss 1460,sackOK,TS val 18495104 ecr 620029603,nop,wscale 2], length 0


10.0.3.246.34908 > 192.168.0.110.22: Flags [.], cksum 0xcf32 (incorrect -> 0x9dcc), ack 1, win 229, options [nop,nop,TS val 620029603 ecr 18495104], length 0

A healthy example would look like the above, in the above we can see a standard TCP 3-way handshake. The first packet above is a SYN packet from host 10.0.3.246 to host 192.168.0.110, the second packet is a SYN-ACKfrom host 192.168.0.110 acknowledging the SYN. The final packet is a ACK or rather a SYN-ACK-ACK from host10.0.3.246 acknowledging that it has received the SYN-ACK. From this point on there is an established TCP/IP connection.

Packet Inspection

Printing packet data in Hex and ASCII

# tcpdump -nvvv -i any -c 1 -XX 'port 80 and host 10.0.3.1'

A common method of troubleshooting application issues over the network is by using tcpdump to use the -XXflag to print the packet data in hex and ascii. This is a pretty helpful command, it allows you to look at both the source, destination, type of packet and the packet itself. However, I am not a fan of this output. I think it is a bit hard to read.

# tcpdump -nvvv -i any -c 1 -XX 'port 80 and host 10.0.3.1'



10.0.3.1.45732 > 10.0.3.246.80: Flags [P.], cksum 0x1ccc (incorrect -> 0x2ce8), seq 3920159713:3920160144, ack 969855140, win 245, options [nop,nop,TS val 624122099 ecr 624117334], length 431

0x0000: 0000 0001 0006 fe0a e2d1 8785 0000 0800 ................

0x0010: 4500 01e3 d429 4000 4006 49f5 0a00 0301 E....)@[email protected].....

0x0020: 0a00 03f6 b2a4 0050 e9a8 e3e1 39ce d0a4 .......P....9...

0x0030: 8018 00f5 1ccc 0000 0101 080a 2533 58f3 ............%3X.

0x0040: 2533 4656 4745 5420 2f73 6f6d 6570 6167 %3FVGET./somepag

0x0050: 6520 4854 5450 2f31 2e31 0d0a 486f 7374 e.HTTP/1.1..Host

0x0060: 3a20 3130 2e30 2e33 2e32 3436 0d0a 436f :.10.0.3.246..Co

0x0070: 6e6e 6563 7469 6f6e 3a20 6b65 6570 2d61 nnection:.keep-a

0x0080: 6c69 7665 0d0a 4361 6368 652d 436f 6e74 live..Cache-Cont

0x0090: 726f 6c3a 206d 6178 2d61 6765 3d30 0d0a rol:.max-age=0..

0x00a0: 4163 6365 7074 3a20 7465 7874 2f68 746d Accept:.text/htm

0x00b0: 6c2c 6170 706c 6963 6174 696f 6e2f 7868 l,application/xh

0x00c0: 746d 6c2b 786d 6c2c 6170 706c 6963 6174 tml+xml,applicat

0x00d0: 696f 6e2f 786d 6c3b 713d 302e 392c 696d ion/xml;q=0.9,im

0x00e0: 6167 652f 7765 6270 2c2a 2f2a 3b71 3d30 age/webp,*/*;q=0

0x00f0: 2e38 0d0a 5573 6572 2d41 6765 6e74 3a20 .8..User-Agent:.

0x0100: 4d6f 7a69 6c6c 612f 352e 3020 284d 6163 Mozilla/5.0.(Mac

0x0110: 696e 746f 7368 3b20 496e 7465 6c20 4d61 intosh;.Intel.Ma

0x0120: 6320 4f53 2058 2031 305f 395f 3529 2041 c.OS.X.10_9_5).A

0x0130: 7070 6c65 5765 624b 6974 2f35 3337 2e33 ppleWebKit/537.3

0x0140: 3620 284b 4854 4d4c 2c20 6c69 6b65 2047 6.(KHTML,.like.G

0x0150: 6563 6b6f 2920 4368 726f 6d65 2f33 382e ecko).Chrome/38.

0x0160: 302e 3231 3235 2e31 3031 2053 6166 6172 0.2125.101.Safar

0x0170: 692f 3533 372e 3336 0d0a 4163 6365 7074 i/537.36..Accept

0x0180: 2d45 6e63 6f64 696e 673a 2067 7a69 702c -Encoding:.gzip,

0x0190: 6465 666c 6174 652c 7364 6368 0d0a 4163 deflate,sdch..Ac

0x01a0: 6365 7074 2d4c 616e 6775 6167 653a 2065 cept-Language:.e

0x01b0: 6e2d 5553 2c65 6e3b 713d 302e 380d 0a49 n-US,en;q=0.8..I

0x01c0: 662d 4d6f 6469 6669 6564 2d53 696e 6365 f-Modified-Since

0x01d0: 3a20 5375 6e2c 2031 3220 4f63 7420 3230 :.Sun,.12.Oct.20

0x01e0: 3134 2031 393a 3430 3a32 3020 474d 540d 14.19:40:20.GMT.

0x01f0: 0a0d 0a ...

Printing packet data in ASCII only

# tcpdump -nvvv -i any -c 1 -A 'port 80 and host 10.0.3.1'

I tend to prefer to print only the ASCII data, this helps me to quickly identify what is being sent and what is correct or not correct about the packets data. To print packet data in only the ascii format you can use the -Aflag.

# tcpdump -nvvv -i any -c 1 -A 'port 80 and host 10.0.3.1'



10.0.3.1.46172 > 10.0.3.246.80: Flags [P.], cksum 0x1c7f (incorrect -> 0xead1), seq 1552520173:1552520527, ack 428165415, win 237, options [nop,nop,TS val 624251177 ecr 624247749], length 354

E.....@[email protected]

...

....\.P\.....I'...........

%5Q)%5C.GET /newpage HTTP/1.1

Host: 10.0.3.246

Connection: keep-alive

Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8

User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.101 Safari/537.36

Accept-Encoding: gzip,deflate,sdch

Accept-Language: en-US,en;q=0.8

As you can see from the output above we have successfully captured an http GET request. Being able to print the packet data in a human readable format is very useful when troubleshooting application issues where the traffic is not encrypted. If you are troubleshooting encrypted traffic then printing packet data is not very useful. However, if you use have the certificates in use you could use commands such as ssldump or even wireshark.

Non-TCP Traffic

While the majority of this article covered TCP based traffic tcpdump can capture much more than TCP. It can also be used to capture ICMP, UDP, and ARP packets to name a few. Below are a few quick examples of non-TCP packets captured by tcpdump.

ICMP packets

# tcpdump -nvvv -i any -c 2 icmp


20:11:24.627824 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto ICMP (1), length 84)

10.0.3.169 > 10.0.3.246: ICMP echo request, id 15683, seq 1, length 64

20:11:24.627926 IP (tos 0x0, ttl 64, id 31312, offset 0, flags [none], proto ICMP (1), length 84)

10.0.3.246 > 10.0.3.169: ICMP echo reply, id 15683, seq 1, length 64

UDP packets

# tcpdump -nvvv -i any -c 2 udp


20:12:41.726355 IP (tos 0xc0, ttl 64, id 0, offset 0, flags [DF], proto UDP (17), length 76)

10.0.3.246.123 > 198.55.111.50.123: [bad udp cksum 0x43a9 -> 0x7043!] NTPv4, length 48

Client, Leap indicator: clock unsynchronized (192), Stratum 2 (secondary reference), poll 6 (64s), precision -22

Root Delay: 0.085678, Root dispersion: 57.141830, Reference-ID: 199.102.46.75

Reference Timestamp: 3622133515.811991035 (2014/10/12 20:11:55)

Originator Timestamp: 3622133553.828614115 (2014/10/12 20:12:33)

Receive Timestamp: 3622133496.748308420 (2014/10/12 20:11:36)

Transmit Timestamp: 3622133561.726278364 (2014/10/12 20:12:41)

Originator - Receive Timestamp: -57.080305658

Originator - Transmit Timestamp: +7.897664248

20:12:41.748948 IP (tos 0x0, ttl 54, id 9285, offset 0, flags [none], proto UDP (17), length 76)

198.55.111.50.123 > 10.0.3.246.123: [udp sum ok] NTPv4, length 48

Server, Leap indicator: (0), Stratum 3 (secondary reference), poll 6 (64s), precision -20

Root Delay: 0.054077, Root dispersion: 0.058944, Reference-ID: 216.229.0.50

Reference Timestamp: 3622132887.136984840 (2014/10/12 20:01:27)

Originator Timestamp: 3622133561.726278364 (2014/10/12 20:12:41)

Receive Timestamp: 3622133618.830113530 (2014/10/12 20:13:38)

Transmit Timestamp: 3622133618.830129086 (2014/10/12 20:13:38)

Originator - Receive Timestamp: +57.103835195

Originator - Transmit Timestamp: +57.103850722

Socket options SO_REUSEADDR and SO_REUSEPORT

Welcome to the wonderful world of portability... or rather the lack of it. Before we start analyzing these two options in detail and take a deeper look how different operating systems handle them, it should be noted that the BSD socket implementation is the mother of all socket implementations. Basically all other systems copied the BSD socket implementation at some point of time (or at least its interfaces) and then started to evolving it on their own. Of course the BSD socket implementation was evolved as well at the same time and thus systems that copied it later got features that were lacking in systems that copied it earlier. Understanding the BSD socket implementation is the key to understanding all other socket implementations, so you should read about it even if you don't care to ever write code for a BSD system.

There are a couple of basics you should know before we look at these two options. A TCP/UDP connection is identified by a tuple of five values:

{<protocol>, <src addr>, <src port>, <dest addr>, <dest port>}Any unique combination of these values identifies a connection. As a result, no two connections can have the same five values, otherwise the system would not be able to distinguish these connections any longer.

The protocol of a socket is set when a socket is created with the socket() function. The source address and port are set with the bind() function. The destination address and port are set with the connect() function. Since UDP is a connectionless protocol, UDP sockets can be used without connecting them. Yet it is allowed to connect them and in some cases very advantageous for your code and general application design. In connectionless mode, UDP sockets that were not explicitly bound when data is sent over them for the first time are usually automatically bound by the system, as an unbound UDP socket cannot receive any (reply) data. Same is true for an unbound TCP socket, it is automatically bound before it will be connected.If you explicitly bind a socket, it is possible to bind it to port 0, which means "any port". Since a socket cannot really be bound to all existing ports, the system will have to choose a specific port itself in that case (usually from a predefined, OS specific range of source ports). A similar wildcard exists for the source address, which can be "any address" (0.0.0.0 in case of IPv4 and :: in case of IPv6). Unlike in case of ports, a socket can really be bound to "any address" which means "all source IP addresses of all local interfaces". If the socket is connected later on, the system has to choose a specific source IP address, since a socket cannot be connected and at the same time be bound to any local IP address. Depending on the destination address and the content of the routing table, the system will pick an appropriate source address and replace the "any" binding with a binding to the chosen source IP address.By default, no two sockets can be bound to the same combination of source address and source port. As long as the source port is different, the source address is actually irrelevant. Binding socketA to A:X and socketB to B:Y, where A and B are addresses and X and Y are ports, is always possible as long as X != Y holds true. However, even if X == Y, the binding is still possible as long as A != B holds true. E.g. socketA belongs to a FTP server program and is bound to 192.168.0.1:21 and socketB belongs to another FTP server program and is bound to 10.0.0.1:21, both bindings will succeed. Keep in mind, though, that a socket may be locally bound to "any address". If a socket is bound to 0.0.0.0:21, it is bound to all existing local addresses at the same time and in that case no other socket can be bound to port 21, regardless which specific IP address it tries to bind to, as 0.0.0.0 conflicts with all existing local IP addresses.Anything said so far is pretty much equal for all major operating system. Things start to get OS specific when address reuse comes into play. We start with BSD, since as I said above, it is the mother of all socket implementations.

BSD SO_REUSEADDR (BSD is not Linux in this articles context)

If SO_REUSEADDR is enabled on a socket prior to binding it, the socket can be successfully bound unless there is a conflict with another socket bound to exactly the same combination of source address and port. Now you may wonder how is that any different than before? The keyword is "exactly". SO_REUSEADDR mainly changes the way how wildcard addresses ("any IP address") are treated when searching for conflicts.Without SO_REUSEADDR, binding socketA to 0.0.0.0:21 and then binding socketB to 192.168.0.1:21 will fail (with error EADDRINUSE), since 0.0.0.0 means "any local IP address", thus all local IP addresses are considered in use by this socket and this includes 192.168.0.1, too. With SO_REUSEADDR it will succeed, since 0.0.0.0 and 192.168.0.1 are not exactly the same address, one is a wildcard for all local addresses and the other one is a very specific local address. Note that the statement above is true regardless in which order socketA and socketB are bound; without SO_REUSEADDR it will always fail, with SO_REUSEADDR it will always succeed.To give you a better overview, let's make a table here and list all possible combinations:

SO_REUSEADDR socketA socketB Result

---------------------------------------------------------------------

ON/OFF 192.168.0.1:21 192.168.0.1:21 Error (EADDRINUSE)

ON/OFF 192.168.0.1:21 10.0.0.1:21 OK

ON/OFF 10.0.0.1:21 192.168.0.1:21 OK

OFF 0.0.0.0:21 192.168.1.0:21 Error (EADDRINUSE)

OFF 192.168.1.0:21 0.0.0.0:21 Error (EADDRINUSE)

ON 0.0.0.0:21 192.168.1.0:21 OK

ON 192.168.1.0:21 0.0.0.0:21 OK

ON/OFF 0.0.0.0:21 0.0.0.0:21 Error (EADDRINUSE)

The table above assumes that socketA has already been successfully bound to the address given for socketA, then socketB is created, either gets SO_REUSEADDR set or not, and finally is bound to the address given for socketB. Result is the result of the bind operation for socketB. If the first column says ON/OFF, the value of SO_REUSEADDR is irrelevant to the result.Okay, SO_REUSEADDR has an effect on wildcard addresses, good to know. Yet that isn't it's only effect it has. There is another well known effect which is also the reason why most people use SO_REUSEADDR in server programs in the first place. For the other important use of this option we have to take a deeper look on how the TCP protocol works.A socket has a send buffer and if a call to the send() function succeeds, it does not mean that the requested data has actually really been sent out, it only means the data has been added to the send buffer. For UDP sockets, the data is usually sent pretty soon, if not immediately, but for TCP sockets, there can be a relatively long delay between adding data to the send buffer and having the TCP implementation really send that data. As a result, when you close a TCP socket, there may still be pending data in the send buffer, which has not been sent yet but your code considers it as sent, since the send() call succeeded. If the TCP implementation was closing the socket immediately on your request, all of this data would be lost and your code wouldn't even know about that. TCP is said to be a reliable protocol and losing data just like that is not very reliable. That's why a socket that still has data to send will go into a state called TIME_WAIT when you close it. In that state it will wait until all pending data has been successfully sent or until a timeout is hit, in which case the socket is closed forcefully.The amount of time the kernel will wait before it closes the socket, regardless if it still has pending send data or not, is called the Linger Time. The Linger Time is globally configurable on most systems and by default rather long (two minutes is a common value you will find on many systems). It is also configurable per socket using the socket option SO_LINGER which can be used to make the timeout shorter or longer, and even to disable it completely. Disabling it completely is a very bad idea, though, since closing a TCP socket gracefully is a slightly complex process and involves sending forth and back a couple of packets (as well as resending those packets in case they got lost) and this whole close process is also limited by the Linger Time. If you disable lingering, your socket may not only lose pending data, it is also always closed forcefully instead of gracefully, which is usually not recommended. The details about how a TCP connection is closed gracefully are beyond the scope of this answer, if you want to learn more about, I recommend you have a look at this page. And even if you disabled lingering with SO_LINGER, if your process dies without explicitly closing the socket, BSD (and possibly other systems) will linger nonetheless, ignoring what you have configured. This will happen for example if your code just calls exit() (pretty common for tiny, simple server programs) or the process is killed by a signal (which includes the possibility that it simply crashes because of an illegal memory access). So there is nothing you can do to make sure a socket will never linger under all circumstances.The question is, how does the system treat a socket in state TIME_WAIT? If SO_REUSEADDR is not set, a socket in state TIME_WAIT is considered to still be bound to the source address and port and any attempt to bind a new socket to the same address and port will fail until the socket has really been closed, which may take as long as the configured Linger Time. So don't expect that you can rebind the source address of a socket immediately after closing it. In most cases this will fail. However, if SO_REUSEADDR is set for the

http://www.freesoft.org/CIE/Course/Section4/11.htm

socket you are trying to bind, another socket bound to the same address and port in state TIME_WAIT is simply ignored, after all its already "half dead", and your socket can bind to exactly the same address without any problem. In that case it plays no role that the other socket may have exactly the same address and port. Note that binding a socket to exactly the same address and port as a dying socket in TIME_WAIT state can have unexpected, and usually undesired, side effects in case the other socket is still "at work", but that is beyond the scope of this answer and fortunately those side effects are rather rare in practice.There is one final thing you should know about SO_REUSEADDR. Everything written above will work as long as the socket you want to bind to has address reuse enabled. It is not necessary that the other socket, the one which is already bound or is in a TIME_WAIT state, also had this flag set when it was bound. The code that decides if the bind will succeed or fail only inspects the SO_REUSEADDRflag of the socket fed into the bind() call, for all other sockets inspected, this flag is not even looked at.

SO_REUSEPORT

SO_REUSEPORT is what most people would expect SO_REUSEADDR to be. Basically, SO_REUSEPORTallows you to bind an arbitrary number of sockets to exactly the same source address and port as long as all prior bound sockets also had SO_REUSEPORT set before they were bound. If the first socket that is bound to an address and port does not have SO_REUSEPORT set, no other socket can be bound to exactly the same address and port, regardless if this other socket has SO_REUSEPORTset or not, until the first socket releases its binding again. Unlike in case of SO_REUESADDR the code handling SO_REUSEPORT will not only verify that the currently bound socket has SO_REUSEPORT set but it will also verify that the socket with a conflicting address and port had SO_REUSEADDR set when it was bound.SO_REUSEPORT does not imply SO_REUSEADDR. This means if a socket did not have SO_REUSEPORTset when it was bound and another socket has SO_REUSEPORT set when it is bound to exactly the same address and port, the bind fails, which is expected, but it also fails if the other socket is already dying and is in TIME_WAIT state. To be able bind a socket to the same addresses and port as another socket in TIME_WAIT state requires either SO_REUSEADDR to be set on that socket or SO_REUSEPORT must have been set on both sockets prior to binding them. Of course it is allowed to set both, SO_REUSEPORT and SO_REUSEADDR, on a socket.There is not much more to say about SO_REUSEPORT other than that it was added later than SO_REUSEADDR, that's why you will not find it in many socket implementations of other systems, which "forked" the BSD code before this option was added, and that there was no way to bind two sockets to exactly the same socket address in BSD prior to this option.

Connect() Returning EADDRINUSE?

Most people know that bind() may fail with the error EADDRINUSE, however, when you start playing around with address reuse, you may run into the strange situation that connect() fails with that error as well. How can this be? How can a remote address, after all that's what connect adds to a socket, be already in use? Connecting multiple sockets to exactly the same remote address has never been a problem before, so what's going wrong here?As I said on the very top of my reply, a connection is defined by a tuple of five values, remember? And I also said, that these five values must be unique otherwise the system cannot distinguish two connections any longer, right? Well, with address reuse, you can bind two sockets of the same protocol to the same source address and port. That means three of those five values are already the same for these two sockets. If you now try to connect both of these sockets also to the same destination address and port, you would create two connected sockets, whose tuples are absolutely identical. This cannot work, at least not for TCP connections (UDP connections are no real connections anyway). If data arrived for either one of the two connections, the system could not tell which connection the data belongs to. At least the destination address or destination port must be different for either connection, so that the system has no problem to identify to which connection incoming data belongs to.

So if you bind two sockets of the same protocol to the same source address and port and try to connect them both to the same destination address and port, connect() will actually fail with the

error EADDRINUSE for the second socket you try to connect, which means that a socket with an identical tuple of five values is already connected.

Multicast Addresses

Most people ignore the fact that multicast addresses exist, but they do exist. While unicast addresses are used for one-to-one communication, multicast addresses are used for one-to-many communication. Most people got aware of multicast addresses when they learned about IPv6 but multicast addresses also existed in IPv4, even though this feature was never widely used on the public Internet.

The meaning of SO_REUSEADDR changes for multicast addresses as it allows multiple sockets to be bound to exactly the same combination of source multicast address and port. In other words, for multicast addresses SO_REUSEADDR behaves exactly as SO_REUSEPORT for unicast addresses. Actually the code treats SO_REUSEADDR and SO_REUSEPORT identically for multicast addresses, that means you could say that SO_REUSEADDR implies SO_REUSEPORT for all multicast addresses and the other way round.

FreeBSD/OpenBSD/NetBSD

All these are rather late forks of the original BSD code, that's why they all three offer the same options as BSD and they also behave the same way as in BSD.

MacOS X

At its very core, MacOS X is simply a BSD-style UNIX, based on a rather late fork of the BSD code, which was even synchronized with FreeBSD 5 for the Mac OS 10.3 release. That's why MacOS X offers the same options as BSD and they also behave the same way as in BSD.

iOS

iOS is just modified MacOS X at its core, so everything that applies to MacOS X also applies to iOS.

Linux

Prior to Linux 3.9, only the option SO_REUSEADDR existed. This option behaves generally the as in BSD with two important exceptions. One exception is that a if a listening (server) TCP socket is already bound to a wildcard IP address and a specific port, no other TCP socket can be bound to the same port, regardless whether either one or both sockets have this flag set. Not even if it would use a more specific address (as is allowed in case of BSD). This restriction does not apply to non-listening (client) TCP sockets and it is also possible to first bind a listening TCP socket to a specific IP address and port combination and later on bind another one to a wildcard IP address and the same port. The second exception is that for UDP sockets this option behaves exactly like SO_REUSEPORT in BSD, so two UDP sockets can be bound to exactly the same address and port combination as long as both had this flag set before they were bound.Linux 3.9 added the option SO_REUSEPORT to Linux as well. This option allows two (or more) sockets, TCP or UDP, listening (server) or non-listening (client), to be bound to exactly the same address and port combination as long as all sockets (including the very first one) had this flag set prior to binding them. To prevent "port hijacking", there is one special limitation, though: All sockets that want to share the same address and port combination must belong to processes that share the same effective user ID! So one user cannot "steal" ports of another user. Additionally the kernel performs some "special magic" for SO_REUSEPORT sockets that isn't found in any other operating system so far: For UDP sockets, it tries to

distribute datagrams evenly, for TCP listening sockets, it tries to distribute incoming connect requests (those accepted by calling accept()) evenly across all the sockets that share the same address and port combination. That means while it is more or less random which socket receives a datagram or connect request in other operating systems that allow full address reuse, Linux tries to optimize distribution so that, for example, multiple instances of a simple server process can easily use SO_REUSEPORT sockets to achieve a kind of simple load balancing and that absolutely for free as the kernel is doing "all the hard work" for them.

Android

Even though the whole Android system is somewhat different from most Linux distributions, at its core works a slightly modified Linux kernel, thus everything that applies to Linux applies to Android as well.

Windows

Windows only knows the SO_REUSEADDR option, there is no SO_REUSEPORT. Setting SO_REUSEADDRon a socket in Windows behaves like setting SO_REUSEPORT and SO_REUSEADDR on a socket in BSD, with one exception: A socket with SO_REUSEADDR can always bind to exactly the same source address and port as an already bound socket, even if the other socket did not have this option set when it was bound. This behavior is somewhat dangerous because it allows an application "to steal" the connected port of another application. Needless to say, this can have major security implications. Microsoft realized that this might be a problem and thus added another socket option SO_EXCLUSIVEADDRUSE. Setting SO_EXCLUSIVEADDRUSE on a socket makes sure that if the binding succeeds, the combination of source address and port is owned exclusively by this socket and no other socket can bind to them, not even if it has SO_REUSEADDR set.

Solaris

Solaris is the successor of SunOS. SunOS was originally based on a fork of BSD, SunOS 5 and later was based on a fork of SVR4, however SVR4 is a merge of BSD, System V, and Xenix, so up to some degree Solaris is also a BSD fork, and a rather early one. As a result Solaris only knows SO_REUSEADDR, there is no SO_REUSEPORT. The SO_REUSEADDR behaves pretty much the same as it does in BSD. As far as I know there is no way to get the same behavior as SO_REUSEPORT in Solaris, that means it is not possible to bind two address to exactly the same address and port.Similar to Windows, Solaris has an option to give a socket an exclusive binding. This option is named SO_EXCLBIND. If this option is set on a socket prior to binding it, setting SO_REUSEADDR on another socket has no effect if the two sockets are tested for an address conflict. E.g. if socketA is bound to a wildcard address and socketB has SO_REUSEADDR enabled and is bound to a non-wildcard address and the same port as socketA, this bind will normally succeed, unless socketAhad SO_EXCLBIND enabled, in which case it will fail regardless the SO_REUSEADDR flag of socketB.

Packet Sniffer Code in C Using Linux Sockets

Documents

Transcript of Packet Sniffer Code in C Using Linux Sockets