Read .txt file with C program and parse how to handle empty fields
I try to parse some data of a .txt file, and the format is not really easy to handle. The separator is a space character. The file contains one field with variable length, this is the fifth column from the right side. Therefore, until the fourth column I parse the date from left, then I begin to parse the data from right, until I reach the field with the variable length. This works ok. But my main problem is that I have sometimes fields where is nothing in there, see third row column 3. Because of that with my code I cannot parse the data accurately. In the output file the parsing is not ok for all lines. Is there a possibility to skip empty fields so that sscanf can recognize this fields? It would be very nice if someone can give me a tip how I can parse the data correctly. Code at onlinegdb: https://onlinegdb.com/8rOBlIfMhU
#include <stdio.h>
#include <string.h>
#define BUF 1500
// reverse a string
char *strrev(char *str)
{
char *p1, *p2;
if (! str || ! *str)
return str;
for (p1 = str, p2 = str + strlen(str) - 1; p2 > p1; ++p1, --p2)
{
*p1 ^= *p2;
*p2 ^= *p1;
*p1 ^= *p2;
}
return str;
}
int main()
{
FILE *ptr=fopen("/tmp/abc123", "w");
fputs(
"10000 07/01/1986 68391610 68391610 OPTIMUM MANUFACTURING INC OMFGA 7952 10 10396 3 3 3990 3990 39 399 03/12/1986 OMFGA Q A R -2.56250 1000 . 2.75000 2.37500 . . . C C 3680 2 30/01/1986 . . . . . . . . . . . . 1.00000 1.00 . . . . . 1 1 9 2 0.013809 0.013800 0.011061 0.011046 0.014954\n"
"12781 30/11/1970 84857L10 50558810 LACLEDE GAS CO LG 21080 11 0 1 1 4925 2741 27 274 . N A R 25.00000 3500 . 25.00000 24.00000 . . . 0.041667 0.041667 4141 . . . . . . . . . . . . . . 4.00000 4.0 . . . . . . . . . 0.016698 0.016439 0.021276 0.020949 0.014779\n"
"13901 27/05/1955 02209S10 PHILIP MORRIS & CO LTD 21398 11 0 1 1 2110 2111 21 211 . N A R 42.00000 4400 40.87500 42.00000 40.87500 . . . 0.030675 0.030675 2887 . . . . . . . . . . . . . . 2479.29 576.000 . . . . . . . . . 0.001626 0.001543 0.001477 0.001381 .\n"
"13901 31/05/1955 02209S10 PHILIP MORRIS & CO LTD 21398 11 0 1 1 2110 2111 21 211 . N A R 41.37500 5600 42.12500 42.12500 41.00000 . . . -0.014881 -0.014881 2887 . . . . . . . . . . . . . . 2479.29 576.000 . . . . . . . . . 0.000496 0.000165 -0.000448 -0.000851 .\n"
"13901 01/06/1955 02209S10 PHILIP MORRIS INC 21398 11 0 1 1 2110 2111 21 211 01/07/1962 N A R 40.00000 11300 40.87500 40.87500 40.00000 . . . -0.033233 -0.033233 2887 2 29/12/1955 . . . . . . . . . . . . 2479.29 576.000 . . . . . . . . . 0.001683 0.001476 -0.000496 -0.000724 .\n"
"13901 02/06/1955 02209S10 PHILIP MORRIS INC 21398 11 0 1 1 2110 2111 21 211 . N A R 39.87500 9600 40.00000 40.12500 39.87500 . . . -0.003125 -0.003125 2887 . . . . . . . . . . . . . . 2479.29 576.000 . . . . . . . . . 0.003036 0.002973 0.002027 0.001912 .\n"
"13901 03/06/1955 02209S10 PHILIP MORRIS INC 21398 11 0 1 1 2110 2111 21 211 . N A R 40.12500 5500 40.00000 40.62500 40.00000 . . . 0.006270 0.006270 2887 . . . . . . . . . . . . . . 2479.29 576.000 . . . . . . . . . 0.006440 0.006420 0.004233 0.004141 .\n"
,ptr);
fclose(ptr);
FILE *fp, *fpp;
fp=fopen("/tmp/abc123","r");
char puffer[BUF];
char a[1000],b[1000],c[1000],d[1000],e[1000],f[1000],g[1000],h[1000],i[1000],j[1000],k[1000],l[1000],m[1000],n[1000],o[1000],p[1000],q[1000],r[1000],s[1000],tt[1000],u[1000],v[1000]; // a->PERMNO; b->date; c->CUSIP; d->NCUSIP; e->COMNAM; f->DIVAMT; g->CFACPR
char w[1000],x[1000],y[1000],z[1000],aa[1000],ab[1000],ac[1000],ad[1000],ae[1000],af[1000],ag[1000],ah[1000],ai[1000],aj[1000],ak[1000],al[1000],am[1000],an[1000],ao[1000],ap[1000];
char aq[1000],ar[1000],as[1000],at[1000],au[1000],av[1000],aw[1000],ax[1000],ay[1000],az[1000],ba[1000],bb[1000],bc[1000],bd[1000],be[1000],bf[1000],bg[1000],bh[1000],bi[1000],bj[1000],bk[1000],bl[1000] ;
fpp=fopen("output.txt","w");
if(fpp==NULL)
{
printf("file could not be opened\n");
return 1;
}
while(fgets(puffer, BUF, fp) != NULL)
{
int n1,n2;
char t[1000];
//parse first four columns from left side
if( 4==sscanf(puffer,"%s%s%s%s%n",a,b,c,d,&n1) )
//parse 57 cloumns from the right side
if( 57 ==sscanf(strrev(strcpy(t,puffer)),"%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%n",bl,bk,bj,bi,bh,bg,bf,be,bd,bc,bb,ba,az,ay,ax,aw,av,au,at,as,ar,aq,ap,ao,an,am,al,ak,aj,ai,ah,ag,af,ae,ad,ac,ab,aa,z,y,x,w,v,u,tt,s,r,q,p,o,n,m,l,k,j,i,h,g,f,&n2));
//parse the variable field, is simply what is left in the middle.
if( 1==sscanf(puffer+n1+1,"%[^\n]",e) )
e[strlen(e)-n2]=0,a,b,c,d,e;
strrev(f), strrev(g),strrev(h), strrev(i), strrev(j), strrev(k),
strrev(l),strrev(m), strrev(n), strrev(o), strrev(p), strrev(q);
strrev(r), strrev(s), strrev(tt), strrev(u), strrev(v), strrev(w),
strrev(x), strrev(y), strrev(z), strrev(aa), strrev(ab), strrev(ac),
strrev(ad), strrev(ae), strrev(af), strrev(ag), strrev(ah), strrev(ai),
strrev(aj), strrev(ak),strrev(al), strrev(am), strrev(an), strrev(ao),
strrev(ap), strrev(aq), strrev(ar), strrev(as), strrev(at), strrev(au),
strrev(av), strrev(aw), strrev(ax), strrev(ay), strrev(az), strrev(ba);
strrev(bb), strrev(bc), strrev(bd), strrev(be), strrev(bf), strrev(bg);
strrev(bh), strrev(bi);
// print first 5 columns in the console
printf("%s %s %s %s %s\n",a, b, c, d, e);
// print all parsed columns in output.txt file
fprintf(fpp,"%s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s; %s\n",a,b,c,d,e,f, g, h, i, j, k, l, m ,n, o,p,q,r,s,tt,u,v,w,x,y,z,aa,ab,ac,ad,ae,af,ag,ah,ai,aj,ak,al,am,an,ao,ap,aq,ar,as,at,au,av,aw,ax,ay,az,ba,bb,bc,bd,be,bf,bg,bh,bi);
}
fclose(ptr);
return 0;
}
Comments
Post a Comment