code-completion / archive-misc /concatenator.py
jblitzar's picture
Upload folder using huggingface_hub
a8639ac verified
import os
import glob
from tqdm import tqdm
folder = os.path.expanduser("~/torch_datasets/github-python/mega_licensed_all_files")
output_file = os.path.expanduser(
"~/torch_datasets/github-python/mega_licensed_corpus/concatenated.py"
)
with open(output_file, "w", encoding="utf-8") as out_f:
for file in tqdm(glob.glob(os.path.join(folder, "*.py"))):
out_f.write("\n# <FILESEP>\n")
try:
with open(file, "r", encoding="utf-8", errors="ignore") as in_f:
out_f.write(in_f.read())
except Exception as e:
out_f.write(f"\n# Skipping {file} due to error: {e}\n")
print(f"Concatenation complete: {output_file}")