Tosin Akinosho (aider) commited on
Commit
a0fe4f1
1 Parent(s): 58dbd1c

fix: skip all files under .git directory during partitioning to avoid unsupported file type errors

Browse files
Files changed (1) hide show
  1. utils/document_loader.py +3 -3
utils/document_loader.py CHANGED
@@ -36,7 +36,7 @@ def convert_files_to_txt(src_dir, dst_dir):
36
  # Add detailed logging for skipped sentences
37
  def log_skipped_sentences(elements):
38
  for element in elements:
39
- if element.type == "NarrativeText" and len(element.text.split()) < 5:
40
  logging.debug(f"Skipped sentence: {element.text}")
41
  return elements
42
 
@@ -45,8 +45,8 @@ def convert_files_to_txt(src_dir, dst_dir):
45
  for root, dirs, files in os.walk(src_dir):
46
  for file in files:
47
  file_path = os.path.join(root, file)
48
- if file_path.endswith('.git/index'):
49
- logging.debug(f"Skipping .git/index file: {file_path}")
50
  continue
51
  logging.debug(f"Partitioning file: {file_path}")
52
  try:
 
36
  # Add detailed logging for skipped sentences
37
  def log_skipped_sentences(elements):
38
  for element in elements:
39
+ if hasattr(element, 'type') and element.type == "NarrativeText" and len(element.text.split()) < 5:
40
  logging.debug(f"Skipped sentence: {element.text}")
41
  return elements
42
 
 
45
  for root, dirs, files in os.walk(src_dir):
46
  for file in files:
47
  file_path = os.path.join(root, file)
48
+ if '.git' in file_path:
49
+ logging.debug(f"Skipping .git directory file: {file_path}")
50
  continue
51
  logging.debug(f"Partitioning file: {file_path}")
52
  try: