Question

cap = cv2.VideoCapture(0)

with detection_graph.as_default():
  with tf.compat.v1.Session(graph=detection_graph) as sess:
   ret = True
   while (ret):
      ret,image_np = cap.read()                                                                                                                                                    

      # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
      image_np_expanded = np.expand_dims(image_np, axis=0)
      image_tensor = detection_graph.get_tensor_by_name( image_tensor:0 )
      # Each box represents a part of the image where a particular object was detected.
      boxes = detection_graph.get_tensor_by_name( detection_boxes:0 )
      # Each score represent how level of confidence for each of the objects.
      # Score is shown on the result image, together with the class label.

      scores = detection_graph.get_tensor_by_name( detection_scores:0 )
      classes = detection_graph.get_tensor_by_name( detection_classes:0 )
      num_detections = detection_graph.get_tensor_by_name( num_detections:0 )

      # Actual detection.
      (boxes, scores, classes, num_detections) = sess.run(
          [boxes, scores, classes, num_detections],
          feed_dict={image_tensor: image_np_expanded})      

      vis_util.visualize_boxes_and_labels_on_image_array(
          image_np,
          np.squeeze(boxes),
          np.squeeze(classes).astype(np.int32),
          np.squeeze(scores),
          category_index,
          use_normalized_coordinates=True,
          line_thickness=8)  
                 
      #if cv2.waitKey(2) & 0xFF == ord( r ):
          ret,image_np = cap.read()
          text=pytesseract.image_to_string(image_np)
          print(text) 
          cv2.imwrite( assets/img1.jpg ,image_np)

      for i,b in enumerate(boxes[0]):
        if classes[0][i] ==1:
            if scores[0][i] >= 0.5:
                mid_x = (boxes[0][i][1]+boxes[0][i][3])/2
                mid_y = (boxes[0][i][0]+boxes[0][i][2])/2
                apx_distance = round(((1 - (boxes[0][i][3] - boxes[0][i][1]))**4),1)
                cv2.putText(image_np,  {} .format(apx_distance), (int(mid_x*800),int(mid_y*450)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255,255,255), 2)
                print(apx_distance)
                #telesend(apx_distance)
                #speaker.Speak(apx_distance)          

                if apx_distance <=0.5:
                    if mid_x > 0.3 and mid_x < 0.7:
                        cv2.putText(image_np,  WARNING!!! , (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0,0,255), 3)
                        print("Warning -Person very close to the frame")
                        #telesend( Warning -A person is close infront of you )

I m New to object detection and only know basics of python ,so I copied this code from GitHub and ran it

I m getting very low frame rates and i tried changing the tts (gtts,pyttsx3)

Removing the tts from the if statement bumped up my fps.

So is there Any way to optimise my code for smoother fps without removing tts

友情链接