style: Format all Python files with ruff
- Formatted 103 files to comply with ruff format requirements - No code logic changes, only formatting/whitespace - Fixes CI formatting check failures
This commit is contained in:
@@ -59,27 +59,26 @@ def get_storage_adaptor(provider: str, **kwargs) -> BaseStorageAdaptor:
|
||||
account_name='myaccount')
|
||||
"""
|
||||
adaptors = {
|
||||
's3': S3StorageAdaptor,
|
||||
'gcs': GCSStorageAdaptor,
|
||||
'azure': AzureStorageAdaptor,
|
||||
"s3": S3StorageAdaptor,
|
||||
"gcs": GCSStorageAdaptor,
|
||||
"azure": AzureStorageAdaptor,
|
||||
}
|
||||
|
||||
provider_lower = provider.lower()
|
||||
if provider_lower not in adaptors:
|
||||
supported = ', '.join(adaptors.keys())
|
||||
supported = ", ".join(adaptors.keys())
|
||||
raise ValueError(
|
||||
f"Unsupported storage provider: {provider}. "
|
||||
f"Supported providers: {supported}"
|
||||
f"Unsupported storage provider: {provider}. Supported providers: {supported}"
|
||||
)
|
||||
|
||||
return adaptors[provider_lower](**kwargs)
|
||||
|
||||
|
||||
__all__ = [
|
||||
'BaseStorageAdaptor',
|
||||
'StorageObject',
|
||||
'S3StorageAdaptor',
|
||||
'GCSStorageAdaptor',
|
||||
'AzureStorageAdaptor',
|
||||
'get_storage_adaptor',
|
||||
"BaseStorageAdaptor",
|
||||
"StorageObject",
|
||||
"S3StorageAdaptor",
|
||||
"GCSStorageAdaptor",
|
||||
"AzureStorageAdaptor",
|
||||
"get_storage_adaptor",
|
||||
]
|
||||
|
||||
@@ -9,6 +9,7 @@ from datetime import datetime, timedelta
|
||||
try:
|
||||
from azure.storage.blob import BlobServiceClient, BlobSasPermissions, generate_blob_sas
|
||||
from azure.core.exceptions import ResourceNotFoundError
|
||||
|
||||
AZURE_AVAILABLE = True
|
||||
except ImportError:
|
||||
AZURE_AVAILABLE = False
|
||||
@@ -65,38 +66,30 @@ class AzureStorageAdaptor(BaseStorageAdaptor):
|
||||
"Install with: pip install azure-storage-blob"
|
||||
)
|
||||
|
||||
if 'container' not in kwargs:
|
||||
if "container" not in kwargs:
|
||||
raise ValueError("container parameter is required for Azure storage")
|
||||
|
||||
self.container_name = kwargs['container']
|
||||
self.container_name = kwargs["container"]
|
||||
|
||||
# Initialize BlobServiceClient
|
||||
if 'connection_string' in kwargs:
|
||||
connection_string = kwargs['connection_string']
|
||||
if "connection_string" in kwargs:
|
||||
connection_string = kwargs["connection_string"]
|
||||
else:
|
||||
connection_string = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
|
||||
connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
|
||||
|
||||
if connection_string:
|
||||
self.blob_service_client = BlobServiceClient.from_connection_string(
|
||||
connection_string
|
||||
)
|
||||
self.blob_service_client = BlobServiceClient.from_connection_string(connection_string)
|
||||
# Extract account name from connection string
|
||||
self.account_name = None
|
||||
self.account_key = None
|
||||
for part in connection_string.split(';'):
|
||||
if part.startswith('AccountName='):
|
||||
self.account_name = part.split('=', 1)[1]
|
||||
elif part.startswith('AccountKey='):
|
||||
self.account_key = part.split('=', 1)[1]
|
||||
for part in connection_string.split(";"):
|
||||
if part.startswith("AccountName="):
|
||||
self.account_name = part.split("=", 1)[1]
|
||||
elif part.startswith("AccountKey="):
|
||||
self.account_key = part.split("=", 1)[1]
|
||||
else:
|
||||
account_name = kwargs.get(
|
||||
'account_name',
|
||||
os.getenv('AZURE_STORAGE_ACCOUNT_NAME')
|
||||
)
|
||||
account_key = kwargs.get(
|
||||
'account_key',
|
||||
os.getenv('AZURE_STORAGE_ACCOUNT_KEY')
|
||||
)
|
||||
account_name = kwargs.get("account_name", os.getenv("AZURE_STORAGE_ACCOUNT_NAME"))
|
||||
account_key = kwargs.get("account_key", os.getenv("AZURE_STORAGE_ACCOUNT_KEY"))
|
||||
|
||||
if not account_name or not account_key:
|
||||
raise ValueError(
|
||||
@@ -108,13 +101,10 @@ class AzureStorageAdaptor(BaseStorageAdaptor):
|
||||
self.account_key = account_key
|
||||
account_url = f"https://{account_name}.blob.core.windows.net"
|
||||
self.blob_service_client = BlobServiceClient(
|
||||
account_url=account_url,
|
||||
credential=account_key
|
||||
account_url=account_url, credential=account_key
|
||||
)
|
||||
|
||||
self.container_client = self.blob_service_client.get_container_client(
|
||||
self.container_name
|
||||
)
|
||||
self.container_client = self.blob_service_client.get_container_client(self.container_name)
|
||||
|
||||
def upload_file(
|
||||
self, local_path: str, remote_path: str, metadata: dict[str, str] | None = None
|
||||
@@ -128,11 +118,7 @@ class AzureStorageAdaptor(BaseStorageAdaptor):
|
||||
blob_client = self.container_client.get_blob_client(remote_path)
|
||||
|
||||
with open(local_file, "rb") as data:
|
||||
blob_client.upload_blob(
|
||||
data,
|
||||
overwrite=True,
|
||||
metadata=metadata
|
||||
)
|
||||
blob_client.upload_blob(data, overwrite=True, metadata=metadata)
|
||||
|
||||
return f"https://{self.account_name}.blob.core.windows.net/{self.container_name}/{remote_path}"
|
||||
except Exception as e:
|
||||
@@ -164,25 +150,26 @@ class AzureStorageAdaptor(BaseStorageAdaptor):
|
||||
except Exception as e:
|
||||
raise Exception(f"Azure deletion failed: {e}") from e
|
||||
|
||||
def list_files(
|
||||
self, prefix: str = "", max_results: int = 1000
|
||||
) -> list[StorageObject]:
|
||||
def list_files(self, prefix: str = "", max_results: int = 1000) -> list[StorageObject]:
|
||||
"""List files in Azure container."""
|
||||
try:
|
||||
blobs = self.container_client.list_blobs(
|
||||
name_starts_with=prefix,
|
||||
results_per_page=max_results
|
||||
name_starts_with=prefix, results_per_page=max_results
|
||||
)
|
||||
|
||||
files = []
|
||||
for blob in blobs:
|
||||
files.append(StorageObject(
|
||||
key=blob.name,
|
||||
size=blob.size,
|
||||
last_modified=blob.last_modified.isoformat() if blob.last_modified else None,
|
||||
etag=blob.etag,
|
||||
metadata=blob.metadata
|
||||
))
|
||||
files.append(
|
||||
StorageObject(
|
||||
key=blob.name,
|
||||
size=blob.size,
|
||||
last_modified=blob.last_modified.isoformat()
|
||||
if blob.last_modified
|
||||
else None,
|
||||
etag=blob.etag,
|
||||
metadata=blob.metadata,
|
||||
)
|
||||
)
|
||||
|
||||
return files
|
||||
except Exception as e:
|
||||
@@ -205,9 +192,7 @@ class AzureStorageAdaptor(BaseStorageAdaptor):
|
||||
raise FileNotFoundError(f"Remote file not found: {remote_path}")
|
||||
|
||||
if not self.account_name or not self.account_key:
|
||||
raise ValueError(
|
||||
"Account name and key are required for SAS URL generation"
|
||||
)
|
||||
raise ValueError("Account name and key are required for SAS URL generation")
|
||||
|
||||
sas_token = generate_blob_sas(
|
||||
account_name=self.account_name,
|
||||
@@ -215,7 +200,7 @@ class AzureStorageAdaptor(BaseStorageAdaptor):
|
||||
blob_name=remote_path,
|
||||
account_key=self.account_key,
|
||||
permission=BlobSasPermissions(read=True),
|
||||
expiry=datetime.utcnow() + timedelta(seconds=expires_in)
|
||||
expiry=datetime.utcnow() + timedelta(seconds=expires_in),
|
||||
)
|
||||
|
||||
return f"{blob_client.url}?{sas_token}"
|
||||
@@ -239,12 +224,13 @@ class AzureStorageAdaptor(BaseStorageAdaptor):
|
||||
|
||||
# Wait for copy to complete
|
||||
properties = dest_blob.get_blob_properties()
|
||||
while properties.copy.status == 'pending':
|
||||
while properties.copy.status == "pending":
|
||||
import time
|
||||
|
||||
time.sleep(0.1)
|
||||
properties = dest_blob.get_blob_properties()
|
||||
|
||||
if properties.copy.status != 'success':
|
||||
if properties.copy.status != "success":
|
||||
raise Exception(f"Copy failed with status: {properties.copy.status}")
|
||||
|
||||
except FileNotFoundError:
|
||||
|
||||
@@ -95,9 +95,7 @@ class BaseStorageAdaptor(ABC):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def list_files(
|
||||
self, prefix: str = "", max_results: int = 1000
|
||||
) -> list[StorageObject]:
|
||||
def list_files(self, prefix: str = "", max_results: int = 1000) -> list[StorageObject]:
|
||||
"""
|
||||
List files in cloud storage.
|
||||
|
||||
@@ -191,9 +189,7 @@ class BaseStorageAdaptor(ABC):
|
||||
|
||||
return uploaded_files
|
||||
|
||||
def download_directory(
|
||||
self, remote_prefix: str, local_dir: str
|
||||
) -> list[str]:
|
||||
def download_directory(self, remote_prefix: str, local_dir: str) -> list[str]:
|
||||
"""
|
||||
Download directory from cloud storage.
|
||||
|
||||
@@ -245,9 +241,7 @@ class BaseStorageAdaptor(ABC):
|
||||
raise FileNotFoundError(f"File not found: {remote_path}")
|
||||
return files[0].size
|
||||
|
||||
def copy_file(
|
||||
self, source_path: str, dest_path: str
|
||||
) -> None:
|
||||
def copy_file(self, source_path: str, dest_path: str) -> None:
|
||||
"""
|
||||
Copy file within cloud storage.
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ from datetime import timedelta
|
||||
try:
|
||||
from google.cloud import storage
|
||||
from google.cloud.exceptions import NotFound
|
||||
|
||||
GCS_AVAILABLE = True
|
||||
except ImportError:
|
||||
GCS_AVAILABLE = False
|
||||
@@ -63,19 +64,19 @@ class GCSStorageAdaptor(BaseStorageAdaptor):
|
||||
"Install with: pip install google-cloud-storage"
|
||||
)
|
||||
|
||||
if 'bucket' not in kwargs:
|
||||
if "bucket" not in kwargs:
|
||||
raise ValueError("bucket parameter is required for GCS storage")
|
||||
|
||||
self.bucket_name = kwargs['bucket']
|
||||
self.project = kwargs.get('project', os.getenv('GOOGLE_CLOUD_PROJECT'))
|
||||
self.bucket_name = kwargs["bucket"]
|
||||
self.project = kwargs.get("project", os.getenv("GOOGLE_CLOUD_PROJECT"))
|
||||
|
||||
# Initialize GCS client
|
||||
client_kwargs = {}
|
||||
if self.project:
|
||||
client_kwargs['project'] = self.project
|
||||
client_kwargs["project"] = self.project
|
||||
|
||||
if 'credentials_path' in kwargs:
|
||||
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = kwargs['credentials_path']
|
||||
if "credentials_path" in kwargs:
|
||||
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = kwargs["credentials_path"]
|
||||
|
||||
self.storage_client = storage.Client(**client_kwargs)
|
||||
self.bucket = self.storage_client.bucket(self.bucket_name)
|
||||
@@ -122,26 +123,24 @@ class GCSStorageAdaptor(BaseStorageAdaptor):
|
||||
except Exception as e:
|
||||
raise Exception(f"GCS deletion failed: {e}") from e
|
||||
|
||||
def list_files(
|
||||
self, prefix: str = "", max_results: int = 1000
|
||||
) -> list[StorageObject]:
|
||||
def list_files(self, prefix: str = "", max_results: int = 1000) -> list[StorageObject]:
|
||||
"""List files in GCS bucket."""
|
||||
try:
|
||||
blobs = self.storage_client.list_blobs(
|
||||
self.bucket_name,
|
||||
prefix=prefix,
|
||||
max_results=max_results
|
||||
self.bucket_name, prefix=prefix, max_results=max_results
|
||||
)
|
||||
|
||||
files = []
|
||||
for blob in blobs:
|
||||
files.append(StorageObject(
|
||||
key=blob.name,
|
||||
size=blob.size,
|
||||
last_modified=blob.updated.isoformat() if blob.updated else None,
|
||||
etag=blob.etag,
|
||||
metadata=blob.metadata
|
||||
))
|
||||
files.append(
|
||||
StorageObject(
|
||||
key=blob.name,
|
||||
size=blob.size,
|
||||
last_modified=blob.updated.isoformat() if blob.updated else None,
|
||||
etag=blob.etag,
|
||||
metadata=blob.metadata,
|
||||
)
|
||||
)
|
||||
|
||||
return files
|
||||
except Exception as e:
|
||||
@@ -164,9 +163,7 @@ class GCSStorageAdaptor(BaseStorageAdaptor):
|
||||
raise FileNotFoundError(f"Remote file not found: {remote_path}")
|
||||
|
||||
url = blob.generate_signed_url(
|
||||
version="v4",
|
||||
expiration=timedelta(seconds=expires_in),
|
||||
method="GET"
|
||||
version="v4", expiration=timedelta(seconds=expires_in), method="GET"
|
||||
)
|
||||
return url
|
||||
except FileNotFoundError:
|
||||
@@ -182,11 +179,7 @@ class GCSStorageAdaptor(BaseStorageAdaptor):
|
||||
if not source_blob.exists():
|
||||
raise FileNotFoundError(f"Source file not found: {source_path}")
|
||||
|
||||
self.bucket.copy_blob(
|
||||
source_blob,
|
||||
self.bucket,
|
||||
dest_path
|
||||
)
|
||||
self.bucket.copy_blob(source_blob, self.bucket, dest_path)
|
||||
except FileNotFoundError:
|
||||
raise
|
||||
except Exception as e:
|
||||
|
||||
@@ -8,6 +8,7 @@ from pathlib import Path
|
||||
try:
|
||||
import boto3
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
BOTO3_AVAILABLE = True
|
||||
except ImportError:
|
||||
BOTO3_AVAILABLE = False
|
||||
@@ -63,33 +64,30 @@ class S3StorageAdaptor(BaseStorageAdaptor):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
if not BOTO3_AVAILABLE:
|
||||
raise ImportError(
|
||||
"boto3 is required for S3 storage. "
|
||||
"Install with: pip install boto3"
|
||||
)
|
||||
raise ImportError("boto3 is required for S3 storage. Install with: pip install boto3")
|
||||
|
||||
if 'bucket' not in kwargs:
|
||||
if "bucket" not in kwargs:
|
||||
raise ValueError("bucket parameter is required for S3 storage")
|
||||
|
||||
self.bucket = kwargs['bucket']
|
||||
self.region = kwargs.get('region', os.getenv('AWS_DEFAULT_REGION', 'us-east-1'))
|
||||
self.bucket = kwargs["bucket"]
|
||||
self.region = kwargs.get("region", os.getenv("AWS_DEFAULT_REGION", "us-east-1"))
|
||||
|
||||
# Initialize S3 client
|
||||
client_kwargs = {
|
||||
'region_name': self.region,
|
||||
"region_name": self.region,
|
||||
}
|
||||
|
||||
if 'endpoint_url' in kwargs:
|
||||
client_kwargs['endpoint_url'] = kwargs['endpoint_url']
|
||||
if "endpoint_url" in kwargs:
|
||||
client_kwargs["endpoint_url"] = kwargs["endpoint_url"]
|
||||
|
||||
if 'aws_access_key_id' in kwargs:
|
||||
client_kwargs['aws_access_key_id'] = kwargs['aws_access_key_id']
|
||||
if "aws_access_key_id" in kwargs:
|
||||
client_kwargs["aws_access_key_id"] = kwargs["aws_access_key_id"]
|
||||
|
||||
if 'aws_secret_access_key' in kwargs:
|
||||
client_kwargs['aws_secret_access_key'] = kwargs['aws_secret_access_key']
|
||||
if "aws_secret_access_key" in kwargs:
|
||||
client_kwargs["aws_secret_access_key"] = kwargs["aws_secret_access_key"]
|
||||
|
||||
self.s3_client = boto3.client('s3', **client_kwargs)
|
||||
self.s3_resource = boto3.resource('s3', **client_kwargs)
|
||||
self.s3_client = boto3.client("s3", **client_kwargs)
|
||||
self.s3_resource = boto3.resource("s3", **client_kwargs)
|
||||
|
||||
def upload_file(
|
||||
self, local_path: str, remote_path: str, metadata: dict[str, str] | None = None
|
||||
@@ -101,14 +99,14 @@ class S3StorageAdaptor(BaseStorageAdaptor):
|
||||
|
||||
extra_args = {}
|
||||
if metadata:
|
||||
extra_args['Metadata'] = metadata
|
||||
extra_args["Metadata"] = metadata
|
||||
|
||||
try:
|
||||
self.s3_client.upload_file(
|
||||
str(local_file),
|
||||
self.bucket,
|
||||
remote_path,
|
||||
ExtraArgs=extra_args if extra_args else None
|
||||
ExtraArgs=extra_args if extra_args else None,
|
||||
)
|
||||
return f"s3://{self.bucket}/{remote_path}"
|
||||
except ClientError as e:
|
||||
@@ -120,50 +118,41 @@ class S3StorageAdaptor(BaseStorageAdaptor):
|
||||
local_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
self.s3_client.download_file(
|
||||
self.bucket,
|
||||
remote_path,
|
||||
str(local_file)
|
||||
)
|
||||
self.s3_client.download_file(self.bucket, remote_path, str(local_file))
|
||||
except ClientError as e:
|
||||
if e.response['Error']['Code'] == '404':
|
||||
if e.response["Error"]["Code"] == "404":
|
||||
raise FileNotFoundError(f"Remote file not found: {remote_path}") from e
|
||||
raise Exception(f"S3 download failed: {e}") from e
|
||||
|
||||
def delete_file(self, remote_path: str) -> None:
|
||||
"""Delete file from S3."""
|
||||
try:
|
||||
self.s3_client.delete_object(
|
||||
Bucket=self.bucket,
|
||||
Key=remote_path
|
||||
)
|
||||
self.s3_client.delete_object(Bucket=self.bucket, Key=remote_path)
|
||||
except ClientError as e:
|
||||
raise Exception(f"S3 deletion failed: {e}") from e
|
||||
|
||||
def list_files(
|
||||
self, prefix: str = "", max_results: int = 1000
|
||||
) -> list[StorageObject]:
|
||||
def list_files(self, prefix: str = "", max_results: int = 1000) -> list[StorageObject]:
|
||||
"""List files in S3 bucket."""
|
||||
try:
|
||||
paginator = self.s3_client.get_paginator('list_objects_v2')
|
||||
paginator = self.s3_client.get_paginator("list_objects_v2")
|
||||
page_iterator = paginator.paginate(
|
||||
Bucket=self.bucket,
|
||||
Prefix=prefix,
|
||||
PaginationConfig={'MaxItems': max_results}
|
||||
Bucket=self.bucket, Prefix=prefix, PaginationConfig={"MaxItems": max_results}
|
||||
)
|
||||
|
||||
files = []
|
||||
for page in page_iterator:
|
||||
if 'Contents' not in page:
|
||||
if "Contents" not in page:
|
||||
continue
|
||||
|
||||
for obj in page['Contents']:
|
||||
files.append(StorageObject(
|
||||
key=obj['Key'],
|
||||
size=obj['Size'],
|
||||
last_modified=obj['LastModified'].isoformat(),
|
||||
etag=obj.get('ETag', '').strip('"')
|
||||
))
|
||||
for obj in page["Contents"]:
|
||||
files.append(
|
||||
StorageObject(
|
||||
key=obj["Key"],
|
||||
size=obj["Size"],
|
||||
last_modified=obj["LastModified"].isoformat(),
|
||||
etag=obj.get("ETag", "").strip('"'),
|
||||
)
|
||||
)
|
||||
|
||||
return files
|
||||
except ClientError as e:
|
||||
@@ -172,13 +161,10 @@ class S3StorageAdaptor(BaseStorageAdaptor):
|
||||
def file_exists(self, remote_path: str) -> bool:
|
||||
"""Check if file exists in S3."""
|
||||
try:
|
||||
self.s3_client.head_object(
|
||||
Bucket=self.bucket,
|
||||
Key=remote_path
|
||||
)
|
||||
self.s3_client.head_object(Bucket=self.bucket, Key=remote_path)
|
||||
return True
|
||||
except ClientError as e:
|
||||
if e.response['Error']['Code'] == '404':
|
||||
if e.response["Error"]["Code"] == "404":
|
||||
return False
|
||||
raise Exception(f"S3 head_object failed: {e}") from e
|
||||
|
||||
@@ -186,12 +172,9 @@ class S3StorageAdaptor(BaseStorageAdaptor):
|
||||
"""Generate presigned URL for S3 object."""
|
||||
try:
|
||||
url = self.s3_client.generate_presigned_url(
|
||||
'get_object',
|
||||
Params={
|
||||
'Bucket': self.bucket,
|
||||
'Key': remote_path
|
||||
},
|
||||
ExpiresIn=expires_in
|
||||
"get_object",
|
||||
Params={"Bucket": self.bucket, "Key": remote_path},
|
||||
ExpiresIn=expires_in,
|
||||
)
|
||||
return url
|
||||
except ClientError as e:
|
||||
@@ -200,16 +183,9 @@ class S3StorageAdaptor(BaseStorageAdaptor):
|
||||
def copy_file(self, source_path: str, dest_path: str) -> None:
|
||||
"""Copy file within S3 bucket (server-side copy)."""
|
||||
try:
|
||||
copy_source = {
|
||||
'Bucket': self.bucket,
|
||||
'Key': source_path
|
||||
}
|
||||
self.s3_client.copy_object(
|
||||
CopySource=copy_source,
|
||||
Bucket=self.bucket,
|
||||
Key=dest_path
|
||||
)
|
||||
copy_source = {"Bucket": self.bucket, "Key": source_path}
|
||||
self.s3_client.copy_object(CopySource=copy_source, Bucket=self.bucket, Key=dest_path)
|
||||
except ClientError as e:
|
||||
if e.response['Error']['Code'] == '404':
|
||||
if e.response["Error"]["Code"] == "404":
|
||||
raise FileNotFoundError(f"Source file not found: {source_path}") from e
|
||||
raise Exception(f"S3 copy failed: {e}") from e
|
||||
|
||||
Reference in New Issue
Block a user