Im working on a very long project where I read from a file saved in my campus network system, When reading the file, everything works perfectly IF i delete the white spaces at the bottom of the list, but when i leave them in(like the prof wants) i get an error, "invalid literal for int() with base 10: Date " Ive tried a few different options to ignore white spaces but none have worked- List of what Ive tried

 with open("C:\Users\BraydOneDrive\Documents\2015HomicideLog_FINAL.txt") as f_in:
     lines = (line.rstrip() for line in f_in) 
     lines = list(line for line in lines if line)

for line in file: if not line.strip(): print("it is empty line")

with open("fname.txt") as file:
    for line in file:
      if not line.strip():

with open file as f_in: lines = list(line for line in (l.strip() for l in f_in) if line)

没有工作, 这里指的是在删除档案中的白色空间时使用的,它完全运作。

file = open("C:\Users\BraydOneDrive\Documents\2015HomicideLog_FINAL.txt" , "r")

我一直在工作,并试图在白天周围工作12小时,并且有 t。 任何想法?


Date   Event #  TIME    Victim Name     V R/G   V Age
150101 0685 2:03    Anderson, Kedral    BM  26
150103 0816 5:57    Shines, Kathryn     WF  54
150106 4417 22:06   Norton, Noella      HF  46
150107 4655 23:27   Speidel, Steven     WM  41
150110 1100 8:35    Orozco, Jose        HM  53
*blank spaces here*
     *blank spaces here*


def dayofmurder(date): #function to find day of the murder
    date = date%10000 #takes 10000 out leaving 2 digits for year
    month = date//100 #takes 100 out leaving 1-2 digits for month
    date= date %100 # mod 100 to figure out date
    day=date #day=date
    monthlist = [0,31,59,90,120,151,181,212,243,273,304,334] #possible months through date ranges
    daysofweek = ["Sunday","Monday","Tuesday","Wednesday", #list of days of the week
    startonday = 4 #start on 4th day (thursday) per txt file
    startonday = monthlist[month-1]+(day-1)+startonday # start on day w/ days
    startonday %= 7 #mod 7 to find day of week
    return daysofweek[startonday] #return the day of the week homicide was on

daysoftheweek = ["Sunday","Monday","Tuesday","Wednesday",
"Thursday","Friday","Saturday"] #list of days of the week for printing in order

file = open("C:\Users\BraydOneDrive\Documents\2015HomicideLog_FINAL.txt" , "r")

print("Days Homicides Happened on:")
dayOfmurders = {"Sunday": 0 ,"Monday": 0,"Tuesday": 0,"Wednesday": 0,
"Thursday":0, "Friday": 0,"Saturday": 0} #list of days and start vaule of 0 
for line in lines: #reads all lines
    value=line.split() #splits each value in line
    listdays=(dayofmurder(int(value[0]))) #for every value in the row
    dayOfmurders[listdays] = dayOfmurders[listdays] + 1 #every time there is an
    #occurance, add 1 to total value in dayOfmurders

for v in daysoftheweek: #in order of value (S-M-T-W-TH-F-S (from daysoftheweek 
    print(dayOfmurders[v],"homicides happen on a", v)   #prints [v](value) of 
    #daysOfmurders with string " " and prints v (value) in daysoftheweek)
 , "Number of Homicides
in hour block:")
time = {"0:" : 0, "1:" : 0, "2:" : 0, "3:" : 0, "4:" : 0, "5:" : 0, "6:" : 0,
        ##list of possible time s
        "7:" : 0,"8:" : 0, "9:" : 0, "10" : 0, "11" : 0,"12" : 0, "13" : 0,   
        # " " is the hour possible
        "14" : 0, "15" : 0,"16" : 0,  "17" : 0, "18" : 0, "19" : 0,"20" : 0,  
        # 0 value is the number of occurances
        "21" : 0, "22" : 0, "23" : 0}
for line in lines:      #reads each line of the file
    value=line.split()  #splits up each value in the line
    listdays=(value[2][0:2])  #moves the index of the line and grabs only 
    #first 2 variables
    time[listdays] = time[listdays] + 1

for k,v in time.items():  #uses key and value in time dict
    print(v,"Homicides happened in",k,"hour block")  #
 , "Races and Occurances of Hom
races = {"HF": 0 ,"HM": 0,"WF": 0,"WM": 0,"AF":0, "BM": 0,"BF": 0, "AM": 0} 
#list of races and start value of 0
for line in lines: #function to find all races in Homicide File
    if listdays == "Chunng": #if statement for the people who have more than2 
        listdays = (value[6]) #if they do have more than 2 names, move to the 
        #next index slot and to register race
    elif listdays == "Terrance": #same as above
        listdays = (value[6]) #same as above
    elif listdays == "Lasunda": #same as above
        listdays = (value[6]) #same as above
        listdays = (value[5]) #same as above
    races[listdays] = races[listdays] + 1 #for every occurance add s 1 to the

for k,v in races.items(): #uses key and value in dictionary races
    print(k,"=",v) #prints key and value in race dictionary

ages = { "0" : 0, "1" : 0, "2" : 0, "3" : 0, #list of all possible ages and
# their
        "4" : 0, "5" : 0, "6" : 0,"7" : 0,  #occurances
        "8" : 0,"9" : 0}

for line in lines:   #function to find all ages in Homicide File
    value = line.split()
    listdays = (value[6][0])
    if listdays == "A": #for people w/ 3 names, if index 6 = a/b/t(see race s)
        listdays = (value[7][0]) # skip to next index and use index 7
    elif listdays == "B":
        listdays = (value[7][0])
    elif listdays == "T":
        listdays = (value[7][0])
        listdays = (value[6][0])
    ages[listdays] = ages[listdays]+ 1 #adds all occurances

for k,v in ages.items(): #uses key and value in dictionary ages
    print(k, "=", v) #prints key and value in age dictionary

 , "Here are the Graphs from
data found above")

import pylab #importing pylab for graphs
bar_width = .75
x_values = [1,2,3,4,5,6,7] #range 1-7
y_values = [13,25,17,26,20,14,19] # data from murder occurances, see above
tlabel = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"]
pylab.title("Homicide Occurenece by Day of Week Per Homicides File")
pylab.bar(x_values, y_values, width=bar_width, tick_label = tlabel, align = 
 center  , color =  b )

pylab.axes(aspect = 1) #used pylab example from sheet
values = [39, 11, 31, 6, 1, 2, 29, 15] #data from race/gender see above
pie_labels = ["BM", "BF", "HM", "HF", "AM", "AF", "WM", "WF"]
color_list = [ purple ,  green ,  blue ,  cyan ,  yellow ,  maroon ,  red ,
               white ]
pylab.pie(values,autopct =  %1.f%% , labels = pie_labels, colors=color_list)
pylab.title("Pie Chart Showing Racial and Gender Breakdown in Homicides File")

bar_width = .5 #used pylab examples from sheet (sets bar width)
x_values = [0,1,2,3,4,5,6,7,8,9] #range 0-9 (0-9,10-19,20-29... ect)
y_values = [4,7,27,41,4,15,7,6,2,5] # number of occurances per age
tlabel = ["0-10", "11-20", "21-30", "31-40", "41-50", "51-60",
          "61-70", "71-80", "81-90", "90+"]
pylab.title("Homicides per Age Categories in Homocide File")
pylab.bar(x_values, y_values, width=bar_width, tick_label = tlabel, align = 
 center  , color =  b )

bar_width = .3 #pylab example from sheet(sets bar width)
x_values = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23]
#^number of hours possible for murders
y_values = [3,3,7,1,4,6,4,4,4,5,5,3,8,4,6,2,5,13,10,6,7,5,13,6] #occurances
#of deaths per hour
tlabel = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12",
          "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"]
pylab.title("Homicides Per Hour of the Clock in Homicide File")
pylab.bar(x_values, y_values, width=bar_width, tick_label = tlabel, align =
  center  , color =  b )


lines = []
with open("fname.txt") as f:
    for line in f:
        line = line.strip()
        if line:

# do something with lines         

print("Days Homicides Happened on:")

with open("fname.txt") as f:
    lines = [line.strip() for line in f if line.strip()]

# do something with lines         

print("Days Homicides Happened on:")


file = open("C:\Users\BraydOneDrive\Documents\2015HomicideLog_FINAL.txt" , "r")
lines = file.readlines()[1:]

print("Days Homicides Happened on:") 

for line in lines:
    # check if line is not empty
    if line.strip():
        # do something with not-empty line
        for number in line.split():


def dayofmurder(date): #function to find day of the murder
    date = date%10000 #takes 10000 out leaving 2 digits for year
    month = date//100 #takes 100 out leaving 1-2 digits for month
    date= date %100 # mod 100 to figure out date
    day=date #day=date
    monthlist = [0,31,59,90,120,151,181,212,243,273,304,334] #possible months through date ranges
    daysofweek = ["Sunday","Monday","Tuesday","Wednesday", #list of days of the week
    startonday = 4 #start on 4th day (thursday) per txt file
    startonday = monthlist[month-1]+(day-1)+startonday # start on day w/ days
    startonday %= 7 #mod 7 to find day of week
    return daysofweek[startonday] #return the day of the week homicide was on

daysoftheweek = ["Sunday","Monday","Tuesday","Wednesday",
"Thursday","Friday","Saturday"] #list of days of the week for printing in order



with open("fname.txt") as f:
    lines = [line.strip() for line in f if line.strip()]

# skip headers         
lines = lines[1:]


print("Days Homicides Happened on:")
dayOfmurders = {"Sunday": 0 ,"Monday": 0,"Tuesday": 0,"Wednesday": 0,
"Thursday":0, "Friday": 0,"Saturday": 0} #list of days and start vaule of 0 
for line in lines: #reads all lines
    value=line.split() #splits each value in line
    listdays=(dayofmurder(int(value[0]))) #for every value in the row
    dayOfmurders[listdays] = dayOfmurders[listdays] + 1 #every time there is an
    #occurance, add 1 to total value in dayOfmurders

for v in daysoftheweek: #in order of value (S-M-T-W-TH-F-S (from daysoftheweek 
    print(dayOfmurders[v],"homicides happen on a", v)   #prints [v](value) of 
    #daysOfmurders with string " " and prints v (value) in daysoftheweek)
 , "Number of Homicides
in hour block:")
time = {"0:" : 0, "1:" : 0, "2:" : 0, "3:" : 0, "4:" : 0, "5:" : 0, "6:" : 0,
        ##list of possible time s
        "7:" : 0,"8:" : 0, "9:" : 0, "10" : 0, "11" : 0,"12" : 0, "13" : 0,   
        # " " is the hour possible
        "14" : 0, "15" : 0,"16" : 0,  "17" : 0, "18" : 0, "19" : 0,"20" : 0,  
        # 0 value is the number of occurances
        "21" : 0, "22" : 0, "23" : 0}
for line in lines:      #reads each line of the file
    value=line.split()  #splits up each value in the line
    listdays=(value[2][0:2])  #moves the index of the line and grabs only 
    #first 2 variables
    time[listdays] = time[listdays] + 1

for k,v in time.items():  #uses key and value in time dict
    print(v,"Homicides happened in",k,"hour block")  #
 , "Races and Occurances of Hom
races = {"HF": 0 ,"HM": 0,"WF": 0,"WM": 0,"AF":0, "BM": 0,"BF": 0, "AM": 0} 
#list of races and start value of 0
for line in lines: #function to find all races in Homicide File
    if listdays == "Chunng": #if statement for the people who have more than2 
        listdays = (value[6]) #if they do have more than 2 names, move to the 
        #next index slot and to register race
    elif listdays == "Terrance": #same as above
        listdays = (value[6]) #same as above
    elif listdays == "Lasunda": #same as above
        listdays = (value[6]) #same as above
        listdays = (value[5]) #same as above
    races[listdays] = races[listdays] + 1 #for every occurance add s 1 to the

for k,v in races.items(): #uses key and value in dictionary races
    print(k,"=",v) #prints key and value in race dictionary

ages = { "0" : 0, "1" : 0, "2" : 0, "3" : 0, #list of all possible ages and
# their
        "4" : 0, "5" : 0, "6" : 0,"7" : 0,  #occurances
        "8" : 0,"9" : 0}

for line in lines:   #function to find all ages in Homicide File
    value = line.split()
    listdays = (value[6][0])
    if listdays == "A": #for people w/ 3 names, if index 6 = a/b/t(see race s)
        listdays = (value[7][0]) # skip to next index and use index 7
    elif listdays == "B":
        listdays = (value[7][0])
    elif listdays == "T":
        listdays = (value[7][0])
        listdays = (value[6][0])
    ages[listdays] = ages[listdays]+ 1 #adds all occurances

for k,v in ages.items(): #uses key and value in dictionary ages
    print(k, "=", v) #prints key and value in age dictionary

 , "Here are the Graphs from
data found above")

import pylab #importing pylab for graphs
bar_width = .75
x_values = [1,2,3,4,5,6,7] #range 1-7
y_values = [13,25,17,26,20,14,19] # data from murder occurances, see above
tlabel = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"]
pylab.title("Homicide Occurenece by Day of Week Per Homicides File")
pylab.bar(x_values, y_values, width=bar_width, tick_label = tlabel, align = 
 center  , color =  b )

pylab.axes(aspect = 1) #used pylab example from sheet
values = [39, 11, 31, 6, 1, 2, 29, 15] #data from race/gender see above
pie_labels = ["BM", "BF", "HM", "HF", "AM", "AF", "WM", "WF"]
color_list = [ purple ,  green ,  blue ,  cyan ,  yellow ,  maroon ,  red ,
               white ]
pylab.pie(values,autopct =  %1.f%% , labels = pie_labels, colors=color_list)
pylab.title("Pie Chart Showing Racial and Gender Breakdown in Homicides File")

bar_width = .5 #used pylab examples from sheet (sets bar width)
x_values = [0,1,2,3,4,5,6,7,8,9] #range 0-9 (0-9,10-19,20-29... ect)
y_values = [4,7,27,41,4,15,7,6,2,5] # number of occurances per age
tlabel = ["0-10", "11-20", "21-30", "31-40", "41-50", "51-60",
          "61-70", "71-80", "81-90", "90+"]
pylab.title("Homicides per Age Categories in Homocide File")
pylab.bar(x_values, y_values, width=bar_width, tick_label = tlabel, align = 
 center  , color =  b )

bar_width = .3 #pylab example from sheet(sets bar width)
x_values = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23]
#^number of hours possible for murders
y_values = [3,3,7,1,4,6,4,4,4,5,5,3,8,4,6,2,5,13,10,6,7,5,13,6] #occurances
#of deaths per hour
tlabel = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12",
          "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"]
pylab.title("Homicides Per Hour of the Clock in Homicide File")
pylab.bar(x_values, y_values, width=bar_width, tick_label = tlabel, align =
  center  , color =  b )


Case where it trully is just whitespace

with open("file.txt") as f:
    data = f.readlines()
    headersRaw = data[0].split()
    headersFinal = [headersRaw[0],                   # Date
                    headersRaw[1]+" "+headersRaw[2], # Event #
                    headersRaw[3],                   # Time
                    headersRaw[4]+" "+headersRaw[5], # Victim name
                    headersRaw[6],                   # V
                    headersRaw[7],                   # R/G
                    headersRaw[8]+" "+headersRaw[9]  # V Age
    i = 1
    computedData = []
    while data[i].split()[O].isdigit():
        rawData = data[i].split()
        computedData.append([rawData[0],                # Date
                             rawData[1],                # Event #
                             rawData[2],                # Time
                             rawData[3]+" "+rawData[4], # Victim name
                             rawData[5],                # V
                             rawData[6],                # R/G
                             rawData[7]                 # V Age
        i += 1


Case where next line might be something different but not whitespace

with open("file.txt") as f:
    data = f.readlines()
    headersRaw = data[0].split()
    headersFinal = [headersRaw[0],                   # Date
                    headersRaw[1]+" "+headersRaw[2], # Event #
                    headersRaw[3],                   # Time
                    headersRaw[4]+" "+headersRaw[5], # Victim name
                    headersRaw[6],                   # V
                    headersRaw[7],                   # R/G
                    headersRaw[8]+" "+headersRaw[9]  # V Age
    i = 1
    computedData = []
    while len(data[i].split()[O]) == 6:
        rawData = data[i].split()
        computedData.append([rawData[0],                # Date
                             rawData[1],                # Event #
                             rawData[2],                # Time
                             rawData[3]+" "+rawData[4], # Victim name
                             rawData[5],                # V
                             rawData[6],                # R/G
                             rawData[7]                 # V Age
        i += 1




它可能不是最佳解决办法,而没有更多地了解此后究竟能够做些什么,那么就很难进行完美检查。 d 我建议,如果你能够的话,就添加一条像树胶线这样的东西,或者你可以轻易查到 t。

定期用名人表达。 下面将按你档案的全部数据领域分类,而不是任意的。 定期表述是数据线的1:1。 如果一行不相符,则不予理会。 这意味着,你不必穿透头条线和去除白色空间。 这些 are子跟着。

import re
expr    = re.compile(r ^(?P<date>d+) +(?P<event>d+) +(?P<time>d{1,2}:d{2}) +(?P<name>[a-z]+, [a-z]+) +(?P<RG>[a-z]{2}) +(?P<age>d{1,3}) *$ , re.I)

with open("fname.txt") as file:
    for line in file.readlines():
        if m:=expr.match(line):
            #entry = {
            #     date  :int(m.group( date )),
            #     event :int(m.group( event )),
            #     time  :m.group( time ),
            #     name  :m.group( name ),
            #     rg    :m.group( RG ),
            #     age   :int(m.group( age )),

在这方面,根据你们的需要,举例说明了如何利用上述情况。 这也是几个其他事情的例子。

import re, os, pylab
from typing import Iterable

MSTART = (0,31,59,90,120,151,181,212,243,273,304,334)
COLORS = ( purple ,  green ,  blue ,  cyan ,  yellow ,  maroon ,  red ,  white )

DOW    = ("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat")
HOURS  = tuple(map(str, range(24)))
AGES   = tuple(f {i*10}-{(i+1)*10-1}  for i in range(12)) 
RG     = ( AF , AM , BF , BM , HF , HM , WF , WM )

TITLES = ( Day Of Week ,  Hour ,  Age ,  Race / Gender )

def day(date, startonday:int=4) -> str:
    month = int(date[2:4])-1
    day   = int(date[4:])+startonday-1
    return DOW[(MSTART[month]+day)%7]
def prune(d:dict):
    ks, vs = [],[]
    for k,v in d.items():
        if v:
    return ks, vs
def plotter(title:str, labels:list, values:list, color:Iterable= b ):
    pylab.bar(range(len(labels)), values, width=.5, tick_label=labels, align= center , color=color)
def pie(title:str, labels:list, values:list, color:Iterable=COLORS):
    pylab.pie(values, autopct= %1.f%% , labels=labels, colors=color)

stats = tuple({k:0 for k in targ} for targ in LABELS)

#match line
expr  = re.compile(r ^(?P<date>d+) +(?P<event>d+) +(?P<time>d{1,2}:d{2}) +(?P<name>[a-z]+, [a-z]+) +(?P<RG>[a-z]{2}) +(?P<age>d{1,3})s*$ , re.I|re.M)

#update stats
with open("fname.txt") as file:
    for line in file.readlines():
        if m:=expr.match(line):
            d = day(m.group( date ))          #day of week
            h = m.group( time ).split( : )[0] #hour
            r = m.group( RG )                 #race / gender
            #reformat age to key
            a = f {m.group("age"):02} 
            a = int(a[0:len(a)-1])
            a = f {a*10}-{(a+1)*10-1} 
            for targ, i in zip(stats, (d, h, a, r)):
                targ[i] += 1

for t,l,s in zip(TITLES, LABELS, stats):
    t = f"Homicides By {t} In Homicide File"

    if l in (RG, ):
        pie(t, l, s.values())
    plotter(t, *prune(s))

